diff --git a/.gitattributes b/.gitattributes index 60ed5557898298dcc0c8edefbc089b87818a1d10..d3187730bf3de2949c33574816b048ecfe8f4395 100644 --- a/.gitattributes +++ b/.gitattributes @@ -359,3 +359,24 @@ sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/logs/0717_2000_llava...l_mm sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/logs/0717_2000_llava...l_mme_llava_model_args_82420a/mmerealworld_lite.json filter=lfs diff=lfs merge=lfs -text sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/logs/0717_2000_llava...l_mme_llava_model_args_82420a/submissions/mmbench_en_dev_results.xlsx filter=lfs diff=lfs merge=lfs -text sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/logs/0717_2000_llava...l_mme_llava_model_args_82420a/textvqa_val.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/mmmu_val.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mathvista_testmini.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mme.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mmmu_val.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mmstar.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mathvista_testmini.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mme.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mmmu_val.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mmstar.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mathvista_testmini.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mme.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mmmu_val.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mmstar.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mathvista_testmini.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mme.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mmmu_val.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mmstar.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mathvista_testmini.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mme.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mmmu_val.json filter=lfs diff=lfs merge=lfs -text +sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mmstar.json filter=lfs diff=lfs merge=lfs -text diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2142_llava...u_val_llava_model_args_861273/rank0_metric_eval_done.txt b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2142_llava...u_val_llava_model_args_861273/rank0_metric_eval_done.txt new file mode 100644 index 0000000000000000000000000000000000000000..b9c064df42468d805177a80623c54c976c8d760e --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2142_llava...u_val_llava_model_args_861273/rank0_metric_eval_done.txt @@ -0,0 +1 @@ +rank 0 eval done \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2143_llava...u_val_llava_model_args_861273/rank1_metric_eval_done.txt b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2143_llava...u_val_llava_model_args_861273/rank1_metric_eval_done.txt new file mode 100644 index 0000000000000000000000000000000000000000..36792c9cedb6c006db3a866d72eac15f0ce6a64a --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2143_llava...u_val_llava_model_args_861273/rank1_metric_eval_done.txt @@ -0,0 +1 @@ +rank 1 eval done \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2143_llava...u_val_llava_model_args_861273/rank2_metric_eval_done.txt b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2143_llava...u_val_llava_model_args_861273/rank2_metric_eval_done.txt new file mode 100644 index 0000000000000000000000000000000000000000..d3e5c7ecd1fd051ff210a79f69ad980d587fd5b3 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2143_llava...u_val_llava_model_args_861273/rank2_metric_eval_done.txt @@ -0,0 +1 @@ +rank 2 eval done \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2143_llava...u_val_llava_model_args_861273/rank3_metric_eval_done.txt b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2143_llava...u_val_llava_model_args_861273/rank3_metric_eval_done.txt new file mode 100644 index 0000000000000000000000000000000000000000..3a4b44254d394e29e04b2a41d91f6dc025b8afad --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2143_llava...u_val_llava_model_args_861273/rank3_metric_eval_done.txt @@ -0,0 +1 @@ +rank 3 eval done \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/mmmu_val.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/mmmu_val.json new file mode 100644 index 0000000000000000000000000000000000000000..adc7b206bcb2b625b0bd1076bcdac57f97cb5a84 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/mmmu_val.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3bab9cc5ed7c089d9dc6fd7b61bbefdf945d5ef635cdf5121500ac690836c9e +size 36750400 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank0_metric_eval_done.txt b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank0_metric_eval_done.txt new file mode 100644 index 0000000000000000000000000000000000000000..b9c064df42468d805177a80623c54c976c8d760e --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank0_metric_eval_done.txt @@ -0,0 +1 @@ +rank 0 eval done \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank1_metric_eval_done.txt b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank1_metric_eval_done.txt new file mode 100644 index 0000000000000000000000000000000000000000..36792c9cedb6c006db3a866d72eac15f0ce6a64a --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank1_metric_eval_done.txt @@ -0,0 +1 @@ +rank 1 eval done \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank2_metric_eval_done.txt b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank2_metric_eval_done.txt new file mode 100644 index 0000000000000000000000000000000000000000..d3e5c7ecd1fd051ff210a79f69ad980d587fd5b3 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank2_metric_eval_done.txt @@ -0,0 +1 @@ +rank 2 eval done \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank3_metric_eval_done.txt b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank3_metric_eval_done.txt new file mode 100644 index 0000000000000000000000000000000000000000..3a4b44254d394e29e04b2a41d91f6dc025b8afad --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank3_metric_eval_done.txt @@ -0,0 +1 @@ +rank 3 eval done \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/results.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5f155790f574e42d514a217cffe776d470e09555 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/results.json @@ -0,0 +1,66 @@ +{ + "results": { + "mmmu_val": { + "mmmu_acc,none": 0.39667, + "mmmu_acc_stderr,none": "N/A", + "alias": "mmmu_val" + } + }, + "configs": { + "mmmu_val": { + "task": "mmmu_val", + "dataset_path": "lmms-lab/MMMU", + "test_split": "validation", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "mmmu_acc", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "max_new_tokens": 128, + "until": [ + "\n\n" + ], + "image_aspect_ratio": "original" + }, + "repeats": 1, + "should_decontaminate": false, + "metadata": [ + { + "version": 0.0 + } + ], + "model_specific_generation_kwargs": { + "llava": { + "image_aspect_ratio": "original" + } + } + } + }, + "versions": { + "mmmu_val": "Yaml" + }, + "n-shot": { + "mmmu_val": 0 + }, + "model_configs": { + "model": "llava", + "model_args": "pretrained=/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/SMOE/665K36/revise_Full_smoe_sharev3/checkpoint-4159,conv_template=phi35", + "batch_size": "1", + "device": null, + "limit": null, + "bootstrap_iters": 100000, + "gen_kwargs": "" + }, + "git_hash": "289c7fe5" +} \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mathvista_testmini.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mathvista_testmini.json new file mode 100644 index 0000000000000000000000000000000000000000..abff483508013583c81128b9f6b994453e33f48c --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mathvista_testmini.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dea24a08322e3d4261f46343ad938d9cee0240e86f63937df3a56af0f35b31db +size 45280348 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mme.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mme.json new file mode 100644 index 0000000000000000000000000000000000000000..087e953c8a5eec6cd84b11e603a07c289d7dd966 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mme.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ff23a00ca67cbf25c3913c38b6130c53471ff8ede1639ac638afab6dd079a0 +size 94629687 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mmmu_val.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mmmu_val.json new file mode 100644 index 0000000000000000000000000000000000000000..e755da337d7eb86beca664c029a10535d005fe01 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mmmu_val.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6066f4915e0fe06637da76ce56045cb5c220d5f17687c9fa4c81f604f31555ad +size 36750432 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mmstar.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mmstar.json new file mode 100644 index 0000000000000000000000000000000000000000..79d75cb9bfd3b1b187160ceee3bc7f93c504a993 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mmstar.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a59eae9c9fd88fdf94ee4899dba7f649d4538bce9a912b884dcfe62d72d81e9f +size 60426711 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/rank0_metric_eval_done.txt b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/rank0_metric_eval_done.txt new file mode 100644 index 0000000000000000000000000000000000000000..b9c064df42468d805177a80623c54c976c8d760e --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/rank0_metric_eval_done.txt @@ -0,0 +1 @@ +rank 0 eval done \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/rank1_metric_eval_done.txt b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/rank1_metric_eval_done.txt new file mode 100644 index 0000000000000000000000000000000000000000..36792c9cedb6c006db3a866d72eac15f0ce6a64a --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/rank1_metric_eval_done.txt @@ -0,0 +1 @@ +rank 1 eval done \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/results.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/results.json new file mode 100644 index 0000000000000000000000000000000000000000..47b0ebbe45160a839d63e1d9d51f51ff7834134e --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/results.json @@ -0,0 +1,285 @@ +{ + "results": { + "mathvista_testmini": { + "gpt_eval_score,none": 24.4, + "gpt_eval_score_stderr,none": "N/A", + "alias": "mathvista_testmini" + }, + "mme": { + "mme_cognition_score,none": 300.3571428571429, + "mme_cognition_score_stderr,none": "N/A", + "mme_percetion_score,none": 1333.4614845938377, + "mme_percetion_score_stderr,none": "N/A", + "alias": "mme" + }, + "mmmu_val": { + "mmmu_acc,none": 0.39667, + "mmmu_acc_stderr,none": "N/A", + "alias": "mmmu_val" + }, + "mmstar": { + "coarse perception,none": 0.6777537264839942, + "coarse perception_stderr,none": "N/A", + "fine-grained perception,none": 0.3289425202652911, + "fine-grained perception_stderr,none": "N/A", + "instance reasoning,none": 0.5032956116682105, + "instance reasoning_stderr,none": "N/A", + "logical reasoning,none": 0.37952872210297955, + "logical reasoning_stderr,none": "N/A", + "math,none": 0.2650193798449612, + "math_stderr,none": "N/A", + "science & technology,none": 0.2725281195018929, + "science & technology_stderr,none": "N/A", + "alias": "mmstar" + } + }, + "configs": { + "mathvista_testmini": { + "task": "mathvista_testmini", + "dataset_path": "AI4Math/MathVista", + "dataset_kwargs": { + "token": true + }, + "test_split": "testmini", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "gpt_eval_score", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "until": [ + "ASSISTANT:" + ], + "max_new_tokens": 1024, + "temperature": 0.0, + "top_p": 1.0, + "num_beams": 1, + "do_sample": false, + "image_aspect_ratio": "original" + }, + "repeats": 1, + "should_decontaminate": false, + "model_specific_prompt_kwargs": { + "default": { + "shot_type": "format-prompt", + "shot": 0, + "use_caption": false, + "use_ocr": false + }, + "phi3v": { + "shot_type": "solution" + } + }, + "model_specific_generation_kwargs": { + "llava": { + "image_aspect_ratio": "original" + } + } + }, + "mme": { + "task": "mme", + "dataset_path": "lmms-lab/MME", + "dataset_kwargs": { + "token": false + }, + "test_split": "test", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "mme_percetion_score", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "mme_cognition_score", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "max_new_tokens": 16, + "temperature": 0.0, + "top_p": 1.0, + "num_beams": 1, + "do_sample": false, + "until": [ + "\n\n" + ] + }, + "repeats": 1, + "should_decontaminate": false, + "metadata": [ + { + "version": 0.0 + } + ], + "model_specific_prompt_kwargs": { + "default": { + "pre_prompt": "", + "post_prompt": "\nAnswer the question using a single word or phrase." + }, + "gpt4v": { + "pre_prompt": "", + "post_prompt": "\nAnswer the question with Yes or No." + }, + "qwen_vl": { + "pre_prompt": "", + "post_prompt": " Answer:" + }, + "otterhd": { + "pre_prompt": "", + "post_prompt": " Answer:" + }, + "xcomposer2_4khd": { + "pre_prompt": "[UNUSED_TOKEN_146]user\n", + "post_prompt": " Answer this question briefly[UNUSED_TOKEN_145]\n[UNUSED_TOKEN_146]assistant\n" + } + } + }, + "mmmu_val": { + "task": "mmmu_val", + "dataset_path": "lmms-lab/MMMU", + "test_split": "validation", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "mmmu_acc", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "max_new_tokens": 128, + "until": [ + "\n\n" + ], + "image_aspect_ratio": "original" + }, + "repeats": 1, + "should_decontaminate": false, + "metadata": [ + { + "version": 0.0 + } + ], + "model_specific_generation_kwargs": { + "llava": { + "image_aspect_ratio": "original" + } + } + }, + "mmstar": { + "task": "mmstar", + "dataset_path": "Lin-Chen/MMStar", + "dataset_kwargs": { + "token": true + }, + "test_split": "val", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "coarse perception", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "fine-grained perception", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "instance reasoning", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "logical reasoning", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "science & technology", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "math", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "until": [ + "\n\n" + ], + "do_sample": false + }, + "repeats": 1, + "should_decontaminate": false, + "metadata": [ + { + "version": 0.0 + } + ], + "model_specific_prompt_kwargs": { + "default": { + "pre_prompt": "", + "post_prompt": "\nAnswer with the option's letter from the given choices directly" + } + } + } + }, + "versions": { + "mathvista_testmini": "Yaml", + "mme": "Yaml", + "mmmu_val": "Yaml", + "mmstar": "Yaml" + }, + "n-shot": { + "mathvista_testmini": 0, + "mme": 0, + "mmmu_val": 0, + "mmstar": 0 + }, + "model_configs": { + "model": "llava", + "model_args": "pretrained=/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/SMOE/665K36/revise_Full_smoe_sharev3/checkpoint-4159,conv_template=phi35", + "batch_size": "1", + "device": null, + "limit": null, + "bootstrap_iters": 100000, + "gen_kwargs": "" + }, + "git_hash": "289c7fe5" +} \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/submissions/mathvista_testmini_scores.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/submissions/mathvista_testmini_scores.json new file mode 100644 index 0000000000000000000000000000000000000000..2ca9924f0d76f423a70e5b351de6cfabe4a1dec6 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/submissions/mathvista_testmini_scores.json @@ -0,0 +1,26873 @@ +{ + "1": { + "question_id": "1", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: When a spring does work on an object, we cannot find the work by simply multiplying the spring force by the object's displacement. The reason is that there is no one value for the force-it changes. However, we can split the displacement up into an infinite number of tiny parts and then approximate the force in each as being constant. Integration sums the work done in all those parts. Here we use the generic result of the integration.\r\n\r\nIn Figure, a cumin canister of mass $m=0.40 \\mathrm{~kg}$ slides across a horizontal frictionless counter with speed $v=0.50 \\mathrm{~m} / \\mathrm{s}$. It then runs into and compresses a spring of spring constant $k=750 \\mathrm{~N} / \\mathrm{m}$. When the canister is momentarily stopped by the spring, by what distance $d$ is the spring compressed?", + "choices": null, + "answer": "1.2", + "extraction": "0.1", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 720, + "img_width": 1514, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "3": { + "question_id": "3", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u25b3ABC\u7684\u4e24\u5185\u89d2\u5e73\u5206\u7ebfOB\u3001OC\u76f8\u4ea4\u4e8e\u70b9O\uff0c\u82e5\u2220A\uff1d110\u00b0\uff0c\u5219\u2220BOC\uff1d\uff08\uff09\nChoices:\n(A) 135\u00b0\n(B) 140\u00b0\n(C) 145\u00b0\n(D) 150\u00b0", + "choices": [ + "135\u00b0", + "140\u00b0", + "145\u00b0", + "150\u00b0" + ], + "answer": "145\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "135\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 60, + "img_width": 131, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "5": { + "question_id": "5", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m\\angle H$\nChoices:\n(A) 97\n(B) 102\n(C) 107\n(D) 122", + "choices": [ + "97", + "102", + "107", + "122" + ], + "answer": "97", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "97", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 245, + "img_width": 322, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "7": { + "question_id": "7", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) after eight.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "9": { + "question_id": "9", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\u662f\u4e00\u682a\u7f8e\u4e3d\u7684\u52fe\u80a1\u6811\uff0c\u5176\u4e2d\u6240\u6709\u56db\u8fb9\u5f62\u90fd\u662f\u6b63\u65b9\u5f62\uff0c\u6240\u6709\u7684\u4e09\u89d2\u5f62\u90fd\u662f\u76f4\u89d2\u4e09\u89d2\u5f62\uff0c\u82e5\u6b63\u65b9\u5f62A\u3001B\u7684\u9762\u79ef\u5206\u522b\u4e3a5\u30013\uff0c\u5219\u6700\u5927\u6b63\u65b9\u5f62C\u7684\u9762\u79ef\u662f\uff08\uff09\nChoices:\n(A) 15\n(B) 13\n(C) 11\n(D) 8", + "choices": [ + "15", + "13", + "11", + "8" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 155, + "img_width": 134, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "11": { + "question_id": "11", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red things. Subtract all tiny matte balls. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "5", + "prediction": "5", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "13": { + "question_id": "13", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many objects are preferred by more than 90 percent of people in at least one category?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "15": { + "question_id": "15", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism with be most affected if algae was eliminated?\nChoices:\n(A) Tilapia\n(B) Common water flea\n(C) Great diving beetle\n(D) Tadpole", + "choices": [ + "Tilapia", + "Common water flea", + "Great diving beetle", + "Tadpole" + ], + "answer": "Common water flea", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Tilapia", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 232, + "img_width": 400, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "17": { + "question_id": "17", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220ACB\uff1d90\u00b0\uff0cD\u662fAB\u7684\u4e2d\u70b9\uff0cAB\uff1d10\uff0c\u5219CD\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 5\n(B) 6\n(C) 8\n(D) 10", + "choices": [ + "5", + "6", + "8", + "10" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 172, + "img_width": 125, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "19": { + "question_id": "19", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the highest amount this class measures?", + "choices": null, + "answer": "400", + "extraction": "400", + "prediction": "400", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 684, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "21": { + "question_id": "21", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 4 dots divided into 2 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 418, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "23": { + "question_id": "23", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The derivative of f(x) at x=2 is ____ that at x=5\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "equal to", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 393, + "img_width": 552, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "25": { + "question_id": "25", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Periwinkle the smoothest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 770, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "27": { + "question_id": "27", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "11", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1752, + "img_width": 2628, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "29": { + "question_id": "29", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 440, + "img_width": 670, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "31": { + "question_id": "31", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there more big red rubber double buss in front of the large red double bus than big green things?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "33": { + "question_id": "33", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use a sector paper sheet with a central angle of 120.0 and a radius of 6.0 to roll into a conical bottomless paper cap (as shown in the picture), then the bottom perimeter of the paper cap is ()\nChoices:\n(A) 2\u03c0cm\n(B) 3\u03c0cm\n(C) 4\u03c0cm\n(D) 5\u03c0cm", + "choices": [ + "2\u03c0cm", + "3\u03c0cm", + "4\u03c0cm", + "5\u03c0cm" + ], + "answer": "4\u03c0cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2\u03c0cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 95, + "img_width": 331, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "35": { + "question_id": "35", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u662f\u2299O\u7684\u76f4\u5f84\uff0cEF\uff0cEB\u662f\u2299O\u7684\u5f26\uff0c\u70b9E\u662fFEB\u7684\u4e2d\u70b9\uff0cEF\u4e0eAB\u4ea4\u4e8e\u70b9C\uff0c\u8fde\u63a5OF\uff0c\u82e5\u2220AOF\uff1d40\u00b0\uff0c\u5219\u2220F\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 20\u00b0\n(B) 35\u00b0\n(C) 40\u00b0\n(D) 55\u00b0", + "choices": [ + "20\u00b0", + "35\u00b0", + "40\u00b0", + "55\u00b0" + ], + "answer": "35\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 141, + "img_width": 151, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "37": { + "question_id": "37", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the limit as x approaches -1?", + "choices": null, + "answer": "3", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 410, + "img_width": 408, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "39": { + "question_id": "39", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function odd or even?\nChoices:\n(A) odd\n(B) even", + "choices": [ + "odd", + "even" + ], + "answer": "odd", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "odd", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 304, + "img_width": 433, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "41": { + "question_id": "41", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 3491, + "img_width": 5236, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "43": { + "question_id": "43", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use the graph to answer the question below. Which month is the wettest on average in Christchurch?\nChoices:\n(A) August\n(B) April\n(C) May", + "choices": [ + "August", + "April", + "May" + ], + "answer": "May", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "August", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "elementary school", + "img_height": 323, + "img_width": 449, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "45": { + "question_id": "45", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An administrator at the Department of Motor Vehicles (DMV) tracked the average wait time from month to month. According to the table, what was the rate of change between August and September? (Unit: minutes per month)", + "choices": null, + "answer": "-3", + "extraction": "14", + "prediction": "14", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 273, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "47": { + "question_id": "47", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all rubber balls. Subtract all yellow shiny things. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "49": { + "question_id": "49", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the digits on either end of the sign in the corner?", + "choices": null, + "answer": "0", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 476, + "img_width": 626, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "51": { + "question_id": "51", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of gray rubber objects in front of the small yellow aeroplane greater than the number of big cyan matte fighters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "53": { + "question_id": "53", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 593, + "img_width": 800, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "55": { + "question_id": "55", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u4e00\u5757\u76f4\u89d2\u4e09\u89d2\u677f60\u00b0\u7684\u89d2\u7684\u9876\u70b9A\u4e0e\u76f4\u89d2\u9876\u70b9C\u5206\u522b\u5728\u4e24\u5e73\u884c\u7ebfFG\uff0cDE\u4e0a\uff0c\u659c\u8fb9AB\u5e73\u5206\u2220CAG\uff0c\u4ea4\u76f4\u7ebfDE\u4e8e\u70b9H\uff0c\u5219\u2220BCH\u7684\u5927\u5c0f\u4e3a\uff08\uff09\nChoices:\n(A) 60\u00b0\n(B) 45\u00b0\n(C) 30\u00b0\n(D) 25\u00b0", + "choices": [ + "60\u00b0", + "45\u00b0", + "30\u00b0", + "25\u00b0" + ], + "answer": "30\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 125, + "img_width": 175, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "57": { + "question_id": "57", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small balls. Subtract all blue rubber things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "59": { + "question_id": "59", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, CD is the chord of \u2299O, \u2220ADC = 26.0, then the degree of \u2220CAB is ()\nChoices:\n(A) 26\u00b0\n(B) 74\u00b0\n(C) 64\u00b0\n(D) 54\u00b0", + "choices": [ + "26\u00b0", + "74\u00b0", + "64\u00b0", + "54\u00b0" + ], + "answer": "64\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "26\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 146, + "img_width": 157, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "61": { + "question_id": "61", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Coral the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 427, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "63": { + "question_id": "63", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red matte cubes. Subtract all small green metal objects. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "65": { + "question_id": "65", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: is f(3) > 0?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 325, + "img_width": 327, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "67": { + "question_id": "67", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the square?", + "choices": null, + "answer": "16", + "extraction": "16", + "prediction": "16", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 292, + "img_width": 320, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "69": { + "question_id": "69", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big matte balls. Subtract all green rubber objects. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "71": { + "question_id": "71", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the rectangle?", + "choices": null, + "answer": "18", + "extraction": "36", + "prediction": "36", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 292, + "img_width": 187, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "73": { + "question_id": "73", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Complete the matrix.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "D", + "extraction": "A", + "prediction": "A", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 654, + "img_width": 387, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "75": { + "question_id": "75", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Sky Blue less than Web Maroon?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "77": { + "question_id": "77", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year showed the largest difference in the data points between the two lines", + "choices": null, + "answer": "2019", + "extraction": "2014", + "prediction": "2014", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "79": { + "question_id": "79", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A, B, C, and D are on circle O, and point E is on the extended line of AD. If \u2220ABC = 60.0, then the degree of \u2220CDE is ()\nChoices:\n(A) 30\u00b0\n(B) 45\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "30\u00b0", + "45\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "60\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 104, + "img_width": 123, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "81": { + "question_id": "81", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of r at theta=3*pi/2?", + "choices": null, + "answer": "-1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 460, + "img_width": 616, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "83": { + "question_id": "83", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of shiny buss less than the number of matte things?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "85": { + "question_id": "85", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many countries have people working for more than 35 hours over the years?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "87": { + "question_id": "87", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At a price of $790, is there a shortage or a surplus?'\nChoices:\n(A) shortage\n(B) surplus", + "choices": [ + "shortage", + "surplus" + ], + "answer": "surplus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "shortage", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 353, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "89": { + "question_id": "89", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many miles per gallon do an average city bus get?", + "choices": null, + "answer": "25", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 384, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "91": { + "question_id": "91", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of brown suvs less than the number of brown rubber school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "93": { + "question_id": "93", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What's the computing and wirless total for semiconductor demand in 2014?", + "choices": null, + "answer": "197.3", + "extraction": "100.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "95": { + "question_id": "95", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the straight lines AB and CD intersect at point O, OD bisects \u2220AOE, \u2220BOC = 50.0, then \u2220EOB = ()\nChoices:\n(A) 50\u00b0\n(B) 60\u00b0\n(C) 70\u00b0\n(D) 80\u00b0", + "choices": [ + "50\u00b0", + "60\u00b0", + "70\u00b0", + "80\u00b0" + ], + "answer": "80\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 162, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "97": { + "question_id": "97", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracies higher than 9?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "99": { + "question_id": "99", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which cat is larger?\nChoices:\n(A) white five\n(B) white three\n(C) white four\n(D) white one\n(E) white two", + "choices": [ + "white five", + "white three", + "white four", + "white one", + "white two" + ], + "answer": "white one", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "white five", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "101": { + "question_id": "101", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which shape is most erect?\nChoices:\n(A) Lanceolate\n(B) Heart-shaped\n(C) Linear\n(D) Spatulate", + "choices": [ + "Lanceolate", + "Heart-shaped", + "Linear", + "Spatulate" + ], + "answer": "Linear", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Lanceolate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1204, + "img_width": 376, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "103": { + "question_id": "103", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small purple matte blocks. Subtract all blocks. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "105": { + "question_id": "105", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Violet have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 727, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "107": { + "question_id": "107", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past six.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "109": { + "question_id": "109", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny balls. Subtract all green metallic things. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "5", + "prediction": "5", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "111": { + "question_id": "111", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big gray matte things. Subtract all small metallic cylinders. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "113": { + "question_id": "113", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many baseballs are there?", + "choices": null, + "answer": "20", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 458, + "img_width": 721, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "115": { + "question_id": "115", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1079, + "img_width": 826, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "117": { + "question_id": "117", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the range of this function?\nChoices:\n(A) [0, 2]\n(B) [3, 2]\n(C) [2, 4]\n(D) [-3, 4]", + "choices": [ + "[0, 2]", + "[3, 2]", + "[2, 4]", + "[-3, 4]" + ], + "answer": "[0, 2]", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "[0, 2]", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 356, + "img_width": 460, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "119": { + "question_id": "119", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, P is a point outside \u2299O, PA and PB intersect \u2299O at two points C and D respectively. It is known that the central angles of \u2040AB and \u2040CD are 90.0 and 50.0 respectively, then \u2220P = ()\nChoices:\n(A) 45\u00b0\n(B) 40\u00b0\n(C) 25\u00b0\n(D) 20\u00b0", + "choices": [ + "45\u00b0", + "40\u00b0", + "25\u00b0", + "20\u00b0" + ], + "answer": "20\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 165, + "img_width": 103, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "121": { + "question_id": "121", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In trying to calculate how much money could be saved by packing lunch, Manny recorded the amount he spent on lunch each day. According to the table, what was the rate of change between Wednesday and Thursday? (Unit: $, per day)", + "choices": null, + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 235, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "123": { + "question_id": "123", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagram represents successive rotations, starting from the top down. Which shape comes next?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "D", + "extraction": "B", + "prediction": "B", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 579, + "img_width": 412, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "125": { + "question_id": "125", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens if caterpillars decrease?\nChoices:\n(A) plants decrease\n(B) plants increase\n(C) nothing happens\n(D) none of the above", + "choices": [ + "plants decrease", + "plants increase", + "nothing happens", + "none of the above" + ], + "answer": "plants increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "plants decrease", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 947, + "img_width": 850, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "127": { + "question_id": "127", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much more accurate is the most accurate algorithm compared the least accurate algorithm?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "129": { + "question_id": "129", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the twig to the nearest inch. The twig is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 156, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "131": { + "question_id": "131", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have value below 40?", + "choices": null, + "answer": "3", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "133": { + "question_id": "133", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the merchandise exports greater than 0.92 %?", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1268, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "135": { + "question_id": "135", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of buss that are in front of the big yellow aeroplane less than the number of matte bicycles that are on the right side of the tiny thing?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "137": { + "question_id": "137", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function (f: R to R) injective?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 291, + "img_width": 258, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "139": { + "question_id": "139", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Indigo have the lowest value?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 543, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "141": { + "question_id": "141", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is a long ladder leaning on the wall, the foot of the ladder B is away from the wall 1.6, the point D on the ladder is away from the wall 1.4, the length of BD is 0.55, then the length of the ladder is ()\nChoices:\n(A) 3.85\u7c73\n(B) 4.00\u7c73\n(C) 4.40\u7c73\n(D) 4.50\u7c73", + "choices": [ + "3.85\u7c73", + "4.00\u7c73", + "4.40\u7c73", + "4.50\u7c73" + ], + "answer": "4.40\u7c73", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3.85\u7c73", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 128, + "img_width": 78, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "143": { + "question_id": "143", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the parallelogram ABCD, CE bisects \u2220BCD and it intersects the AD edge at point E, and DE = 3.0, then the length of AB is ()\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 6", + "choices": [ + "1", + "2", + "3", + "6" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 85, + "img_width": 204, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "145": { + "question_id": "145", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Can you find the missing term?", + "choices": null, + "answer": "10", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 506, + "img_width": 900, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "147": { + "question_id": "147", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagrams below show two pure samples of gas in identical closed, rigid containers. Each colored ball represents one gas particle. Both samples have the same number of particles. Compare the average kinetic energies of the particles in each sample. Which sample has the higher temperature?\nChoices:\n(A) neither; the samples have the same temperature\n(B) sample B\n(C) sample A", + "choices": [ + "neither; the samples have the same temperature", + "sample B", + "sample A" + ], + "answer": "sample B", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; the samples have the same temperature", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 405, + "img_width": 563, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "149": { + "question_id": "149", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfl1\u2225l2\uff0c\u22201\uff1d50\u00b0\uff0c\u22202\uff1d75\u00b0\uff0c\u5219\u22203\uff1d\uff08\uff09\nChoices:\n(A) 55\u00b0\n(B) 60\u00b0\n(C) 65\u00b0\n(D) 70\u00b0", + "choices": [ + "55\u00b0", + "60\u00b0", + "65\u00b0", + "70\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "55\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 93, + "img_width": 156, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "151": { + "question_id": "151", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When does the function reach its local maximum?\nChoices:\n(A) (u1, u2) = (0, 0)\n(B) (u1, u2) = (1, 0)\n(C) (u1, u2) = (0, 1)\n(D) (u1, u2) = (1, 1)", + "choices": [ + "(u1, u2) = (0, 0)", + "(u1, u2) = (1, 0)", + "(u1, u2) = (0, 1)", + "(u1, u2) = (1, 1)" + ], + "answer": "(u1, u2) = (0, 0)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(u1, u2) = (0, 0)", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 325, + "img_width": 458, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "153": { + "question_id": "153", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would be impacted by an increase in owls?\nChoices:\n(A) sun\n(B) grasshoppers\n(C) grass\n(D) mice", + "choices": [ + "sun", + "grasshoppers", + "grass", + "mice" + ], + "answer": "mice", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "sun", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 423, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "155": { + "question_id": "155", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Web Green have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 601, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "157": { + "question_id": "157", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9335", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 279, + "img_width": 637, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "159": { + "question_id": "159", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between two consecutive major ticks on the Y-axis ?", + "choices": null, + "answer": "100", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1000, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "161": { + "question_id": "161", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the two numbers visible in the picture?", + "choices": null, + "answer": "71", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "163": { + "question_id": "163", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "7519", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 285, + "img_width": 637, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "165": { + "question_id": "165", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all cyan rubber cylinders. Subtract all tiny shiny cubes. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "167": { + "question_id": "167", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the biggest zero of this function?", + "choices": null, + "answer": "2", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1920, + "img_width": 1920, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "169": { + "question_id": "169", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between two consecutive major ticks on the Y-axis ?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1049, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "171": { + "question_id": "171", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many cinnamon rolls are there?", + "choices": null, + "answer": "20", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 190, + "img_width": 467, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "173": { + "question_id": "173", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of small rubber buss behind the big green road bike less than the number of suvs that are behind the large brown matte truck?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "175": { + "question_id": "175", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of accuracies of the algorithm liver for all the datasets?", + "choices": null, + "answer": "24", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "177": { + "question_id": "177", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of brown tandem bikes that are to the left of the small blue matte car greater than the number of tiny blue biplanes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "179": { + "question_id": "179", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0c\u5df2\u77e5AC\uff1d4cm\uff0c\u82e5\u25b3ACD\u7684\u5468\u957f\u4e3a14cm\uff0c\u5219ABCD\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 14cm\n(B) 28cm\n(C) 10cm\n(D) 20cm", + "choices": [ + "14cm", + "28cm", + "10cm", + "20cm" + ], + "answer": "20cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "14cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 94, + "img_width": 157, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "181": { + "question_id": "181", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option is correct?\nChoices:\n(A) A\n(B) B\n(C) C", + "choices": [ + "A", + "B", + "C" + ], + "answer": "C", + "extraction": "B", + "prediction": "B", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 332, + "img_width": 864, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "183": { + "question_id": "183", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown cubes. Subtract all gray cylinders. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "4", + "prediction": "4", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "185": { + "question_id": "185", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: An image has the gray level PDF $p_r(r)$ shown in Fig. Q1a. One wants to do histogram specification SO that the processed image will have the specified $p_z(z)$ shown in Fig. Q1b. Can we use intensity mapping function $T: z=1-r$ to achieve the goal?\nChoices:\n(A) True\n(B) False", + "choices": [ + "True", + "False" + ], + "answer": "False", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "True", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 376, + "img_width": 724, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "187": { + "question_id": "187", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9015", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 279, + "img_width": 634, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "189": { + "question_id": "189", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest accuracy reported in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "191": { + "question_id": "191", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the volume of the air carriers in Ethiopia greater than the average volume of the air carriers in Ethiopia taken over all years ?", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1116, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "193": { + "question_id": "193", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red things. Subtract all cylinders. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "195": { + "question_id": "195", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u662f\u2299O\u7684\u76f4\u5f84\uff0cC\uff0cD\u4e24\u70b9\u5728\u2299O\u4e0a\uff0c\u2220BCD\uff1d25\u00b0\uff0c\u5219\u2220AOD\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 120\u00b0\n(B) 125\u00b0\n(C) 130\u00b0\n(D) 135\u00b0", + "choices": [ + "120\u00b0", + "125\u00b0", + "130\u00b0", + "135\u00b0" + ], + "answer": "130\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "120\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 95, + "img_width": 110, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "197": { + "question_id": "197", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many sequences have negative Influence Scores?", + "choices": null, + "answer": "2", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "bar chart", + "grade": "college", + "img_height": 772, + "img_width": 1766, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "199": { + "question_id": "199", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Figure 23-42 is a section of a conducting rod of radius $R_1=1.30 \\mathrm{~mm}$ and length $L=$ $11.00 \\mathrm{~m}$ inside a thin-walled coaxial conducting cylindrical shell of radius $R_2=10.0 R_1$ and the (same) length $L$. The net charge on the rod is $Q_1=+3.40 \\times 10^{-12} \\mathrm{C}$; that on the shell is $Q_2=-2.00 Q_1$. What is the magnitude $E$ of the electric field at radial distance $r=2.00 R_2$?", + "choices": null, + "answer": "0.21", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 303, + "img_width": 262, + "language": "english", + "skills": [ + "algebraic reasoning", + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "201": { + "question_id": "201", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of all the values in the border group?", + "choices": null, + "answer": "19", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "203": { + "question_id": "203", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u57285\u00d74\u7684\u6b63\u65b9\u5f62\u7f51\u683c\u4e2d\uff0c\u6bcf\u4e2a\u5c0f\u6b63\u65b9\u5f62\u7684\u8fb9\u957f\u90fd\u662f1\uff0c\u25b3ABC\u7684\u9876\u70b9\u90fd\u5728\u8fd9\u4e9b\u5c0f\u6b63\u65b9\u5f62\u7684\u9876\u70b9\u4e0a\uff0c\u5219tan\u2220BAC\u7684\u503c\u4e3a\uff08\uff09\nChoices:\n(A) \\frac{4}{3}\n(B) 0.75\n(C) 0.6\n(D) 0.8", + "choices": [ + "\\frac{4}{3}", + "0.75", + "0.6", + "0.8" + ], + "answer": "\\frac{4}{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{4}{3}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 151, + "img_width": 172, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "205": { + "question_id": "205", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A statistician analyzed the number of runs scored by players last season. How many players scored more than 2 runs last season?'", + "choices": null, + "answer": "24", + "extraction": "14", + "prediction": "14", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 190, + "img_width": 351, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "207": { + "question_id": "207", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms magic and secure?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "209": { + "question_id": "209", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the highest value in black line chart ?", + "choices": null, + "answer": "28.3", + "extraction": "1.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "211": { + "question_id": "211", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracies higher than 2?", + "choices": null, + "answer": "6", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "213": { + "question_id": "213", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In which year there was lowest per capita real gross domestic product of ohio?", + "choices": null, + "answer": "2001", + "extraction": "1990", + "prediction": "1990", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "215": { + "question_id": "215", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Layla went on a camping trip and logged the number of miles she hiked each day. What is the range of the numbers?'", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 249, + "img_width": 212, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "217": { + "question_id": "217", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the degree of this function?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 202, + "img_width": 304, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "219": { + "question_id": "219", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "221": { + "question_id": "221", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, A, B, C are three points on \u2299O, \u2220ACB = 25.0, then the degree of \u2220BAO is ()\nChoices:\n(A) 50\u00b0\n(B) 55\u00b0\n(C) 60\u00b0\n(D) 65\u00b0", + "choices": [ + "50\u00b0", + "55\u00b0", + "60\u00b0", + "65\u00b0" + ], + "answer": "65\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 108, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "223": { + "question_id": "223", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this an even function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 776, + "img_width": 1430, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "225": { + "question_id": "225", + "query": "Hint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Fig. Q4 shows the contour of an object. Represent it with an 8-directional chain code. The resultant chain code should be normalized with respect to the starting point of the chain code. Represent the answer as a list with each digit as a element.", + "choices": null, + "answer": "[0, 2, 0, 2, 1, 7, 1, 2, 0, 3, 0, 6]", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "true_false": false, + "question_type": "free_form", + "answer_type": "list", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 560, + "img_width": 846, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "227": { + "question_id": "227", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 580, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "229": { + "question_id": "229", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the highest lysine level given?\nChoices:\n(A) 0.33%\n(B) 0.31%\n(C) 0.29%\n(D) 0.32%\n(E) 0.30%", + "choices": [ + "0.33%", + "0.31%", + "0.29%", + "0.32%", + "0.30%" + ], + "answer": "0.30%", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.33%", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2185, + "img_width": 1683, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "231": { + "question_id": "231", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model has the overall best ImageNet 10shot Accuracy score across different training steps?\nChoices:\n(A) Identity\n(B) Uniform\n(C) Uniform / Soft\n(D) Soft / Uniform\n(E) Soft\n(F) Dense", + "choices": [ + "Identity", + "Uniform", + "Uniform / Soft", + "Soft / Uniform", + "Soft", + "Dense" + ], + "answer": "Soft", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Identity", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 988, + "img_width": 2002, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "233": { + "question_id": "233", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 199, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "235": { + "question_id": "235", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the epigraph of a function f an infinite set?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 266, + "img_width": 412, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "237": { + "question_id": "237", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the Red squirrel and deer mouse population were to decrease, what would happen to the deer tick population?\nChoices:\n(A) increase\n(B) fluctuate\n(C) it would decrease\n(D) stay the same", + "choices": [ + "increase", + "fluctuate", + "it would decrease", + "stay the same " + ], + "answer": "it would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 346, + "img_width": 400, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "239": { + "question_id": "239", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Sky Blue the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 769, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "241": { + "question_id": "241", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many models in the table have a model size larger than 10B?", + "choices": null, + "answer": "11", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1184, + "img_width": 1570, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "243": { + "question_id": "243", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: \u0686\u0646\u062f \u0639\u062f\u062f \u0634\u06cc\u0631\u06cc\u0646\u06cc \u0645\u062b\u0644\u062b\u06cc \u0634\u06a9\u0644 \u062f\u0631 \u062c\u0639\u0628\u0647 \u0627\u0633\u062a\u061f", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1001, + "img_width": 564, + "language": "persian", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "ParsVQA-Caps", + "split": "testmini", + "task": "visual question answering" + }, + "245": { + "question_id": "245", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Damon need to buy a grilled steak and a mushroom pizza? (Unit: $)", + "choices": null, + "answer": "24", + "extraction": "113", + "prediction": "113", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 128, + "img_width": 259, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "247": { + "question_id": "247", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: A spaceship of mass $m=4.50 \\times 10^3 \\mathrm{~kg}$ is in a circular Earth orbit of radius $r=8.00 \\times 10^6 \\mathrm{~m}$ and period $T_0=118.6 \\mathrm{~min}=$ $7.119 \\times 10^3 \\mathrm{~s}$ when a thruster is fired in the forward direction to decrease the speed to $96.0 \\%$ of the original speed. What is the period $T$ of the resulting elliptical orbit (Figure)?", + "choices": null, + "answer": "6.36", + "extraction": "1.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 906, + "img_width": 914, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "249": { + "question_id": "249", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all green rubber cubes. Subtract all red matte blocks. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "251": { + "question_id": "251", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all green balls. Subtract all shiny things. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "4", + "prediction": "4", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "253": { + "question_id": "253", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many objects are preferred by more than 7 people in at least one category?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "255": { + "question_id": "255", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u2220BAC = 110.0, if A and B are symmetrical with respect to the line MP, A and C are symmetrical with respect to the line NQ, then the size of \u2220PAQ is ()\nChoices:\n(A) 70\u00b0\n(B) 55\u00b0\n(C) 40\u00b0\n(D) 30\u00b0", + "choices": [ + "70\u00b0", + "55\u00b0", + "40\u00b0", + "30\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "70\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 188, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "257": { + "question_id": "257", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u4ee5\u76f4\u89d2\u4e09\u89d2\u5f62\u7684\u4e09\u8fb9\u4e3a\u8fb9\u5411\u5916\u4f5c\u6b63\u65b9\u5f62\uff0c\u5176\u4e2d\u4e24\u4e2a\u6b63\u65b9\u5f62\u7684\u9762\u79ef\u5982\u56fe\u6240\u793a\uff0c\u5219\u6b63\u65b9\u5f62A\u7684\u9762\u79ef\u4e3a\uff08\uff09\nChoices:\n(A) 6\n(B) 36\n(C) 64\n(D) 8", + "choices": [ + "6", + "36", + "64", + "8" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 119, + "img_width": 109, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "259": { + "question_id": "259", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large yellow metal blocks. Subtract all gray metallic cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "261": { + "question_id": "261", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 345, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "263": { + "question_id": "263", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "38", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 117, + "img_width": 113, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "265": { + "question_id": "265", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Justine's P.E. class participated in a push-up competition, and Justine wrote down how many push-ups each person could do. How many people did at least 60 push-ups? (Unit: people)", + "choices": null, + "answer": "11", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 136, + "img_width": 329, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "267": { + "question_id": "267", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What shape of a leaf is similar to Serrate, but has smaller, evenly-spaced teeth?\nChoices:\n(A) Undulate\n(B) Sinuate\n(C) Serrulate\n(D) Entire", + "choices": [ + "Undulate", + "Sinuate", + "Serrulate", + "Entire" + ], + "answer": "Serrulate", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Undulate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 306, + "img_width": 529, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "269": { + "question_id": "269", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the elevation angle of the top of a building is 30.0 when viewed from point A in the air by a hot air balloon, and the depression angle of this building is 60.0. The horizontal distance between the hot air balloon and the building is 120.0. The height of this building is ()\nChoices:\n(A) 160m\n(B) 160\u221a{3}m\n(C) (160-160\u221a{3})m\n(D) 360m", + "choices": [ + "160m", + "160\u221a{3}m", + "(160-160\u221a{3})m", + "360m" + ], + "answer": "160\u221a{3}m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "160m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 159, + "img_width": 133, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "271": { + "question_id": "271", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find y\nChoices:\n(A) 3\n(B) 4.5\n(C) 5\n(D) 6", + "choices": [ + "3", + "4.5", + "5", + "6" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 287, + "img_width": 448, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "273": { + "question_id": "273", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: One diagonal of a rhombus is twice as long as the other diagonal. If the area of the rhombus is 169 square millimeters, what are the lengths of the diagonals?\nChoices:\n(A) 6.5\n(B) 13\n(C) 26\n(D) 52", + "choices": [ + "6.5", + "13", + "26", + "52" + ], + "answer": "26", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 237, + "img_width": 347, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "275": { + "question_id": "275", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220BAC = 90.0, AD \u22a5 BC at D, DE \u22a5 AB at E, AD = 3.0, DE = 2.0, then the length of CD is ()\nChoices:\n(A) \\frac{21}{2}\n(B) \\frac{\u221a{15}}{2}\n(C) \\frac{9}{2}\n(D) \\frac{3\u221a{5}}{2}", + "choices": [ + "\\frac{21}{2}", + "\\frac{\u221a{15}}{2}", + "\\frac{9}{2}", + "\\frac{3\u221a{5}}{2}" + ], + "answer": "\\frac{3\u221a{5}}{2}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{21}{2}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 107, + "img_width": 185, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "277": { + "question_id": "277", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which cube is identical to the unfolded net?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "D", + "extraction": "E", + "prediction": "E", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 591, + "img_width": 424, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "279": { + "question_id": "279", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would be directly affected by a decrease in sunlight?\nChoices:\n(A) grass\n(B) mouse\n(C) grasshopper\n(D) owl", + "choices": [ + "grass", + "mouse", + "grasshopper", + "owl" + ], + "answer": "grass", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "grass", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 423, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "281": { + "question_id": "281", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Was this a square pizza?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "283": { + "question_id": "283", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{WTY} \\cong \\overline{TWY}$. Find $x$.\nChoices:\n(A) 2\n(B) 4\n(C) 5\n(D) 10", + "choices": [ + "2", + "4", + "5", + "10" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 416, + "img_width": 559, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "285": { + "question_id": "285", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, it is known that AB is the diameter of \u2299O, if the degree of \u2220BOC is 50.0, then the degree of \u2220A is ()\nChoices:\n(A) 50\u00b0\n(B) 40\u00b0\n(C) 30\u00b0\n(D) 25\u00b0", + "choices": [ + "50\u00b0", + "40\u00b0", + "30\u00b0", + "25\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 100, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "287": { + "question_id": "287", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region is larger? R1 or R2?\nA. R1\nB. R2\nChoices:\n(A) R1\n(B) R2\n(C) R5\n(D) R3\n(E) R4", + "choices": [ + "R1", + "R2", + "R5", + "R3", + "R4" + ], + "answer": "R2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "R1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 325, + "img_width": 370, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "289": { + "question_id": "289", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 4 dots divided into 2 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 418, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "291": { + "question_id": "291", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which period the number of full time employees is the maximum?\nChoices:\n(A) Jul '21\n(B) Jun '21\n(C) Mar '21\n(D) May '21\n(E) Apr '21", + "choices": [ + "Jul '21", + "Jun '21", + "Mar '21", + "May '21", + "Apr '21" + ], + "answer": "May '21", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Jul '21", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "293": { + "question_id": "293", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, grasshopper population increase if\nChoices:\n(A) grouse decrease\n(B) chipmunk increases\n(C) grasses increases\n(D) elk increase", + "choices": [ + "grouse decrease", + "chipmunk increases", + "grasses increases", + "elk increase" + ], + "answer": "grasses increases", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "grouse decrease", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 156, + "img_width": 456, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "295": { + "question_id": "295", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "297": { + "question_id": "297", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of green buss greater than the number of blue school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "299": { + "question_id": "299", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the center and the rightmost person? (Unit: years)", + "choices": null, + "answer": "22", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1067, + "img_width": 1600, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "301": { + "question_id": "301", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model performs the best overall across the three stages in terms of Messenger training performance?\nChoices:\n(A) Dynalang\n(B) EMMA\n(C) R2D2\n(D) IMPALA", + "choices": [ + "Dynalang", + "EMMA", + "R2D2", + "IMPALA" + ], + "answer": "Dynalang", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Dynalang", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 524, + "img_width": 2012, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "303": { + "question_id": "303", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Lime Green less than Dim Gray?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 797, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "305": { + "question_id": "305", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people prefer the most preferred object?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "307": { + "question_id": "307", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Figure is an overhead view of the path taken by a race car driver as his car collides with the racetrack wall. Just before the collision, he is traveling at speed $v_i=70 \\mathrm{~m} / \\mathrm{s}$ along a straight line at $30^{\\circ}$ from the wall. Just after the collision, he is traveling at speed $v_f=50 \\mathrm{~m} / \\mathrm{s}$ along a straight line at $10^{\\circ}$ from the wall. His mass $m$ is $80 \\mathrm{~kg}$. The collision lasts for $14 \\mathrm{~ms}$. What is the magnitude of the average force on the driver during the collision?", + "choices": null, + "answer": "2.58", + "extraction": "110.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 466, + "img_width": 772, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning", + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "309": { + "question_id": "309", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The movie critic liked to count the number of actors in each movie he saw. How many movies had at least 30 actors but fewer than 47 actors? (Unit: movies)", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 136, + "img_width": 131, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "311": { + "question_id": "311", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "2", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1947, + "img_width": 1620, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "313": { + "question_id": "313", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "10", + "extraction": "10", + "prediction": "10", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 334, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "315": { + "question_id": "315", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram above, angle A is congruent to angle BED, and angle C is congruent to angle D. If the ratio of the length of AB to the length of EB is 5:1, and the area of the triangle BED is 5*a^2 + 10, what is the area of triangle ABC?\nChoices:\n(A) 5*a^2 + 10\n(B) 25*a^2 + 50\n(C) 25*a^2 + 100\n(D) 125*a^2 + 250\n(E) cannot be determined", + "choices": [ + "5*a^2 + 10", + "25*a^2 + 50", + "25*a^2 + 100", + "125*a^2 + 250", + "cannot be determined" + ], + "answer": "125*a^2 + 250", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5*a^2 + 10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 463, + "img_width": 749, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "317": { + "question_id": "317", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 361, + "img_width": 496, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "319": { + "question_id": "319", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Would most of the ground cover be considered weeds?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "321": { + "question_id": "321", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At a price of $330, is there a shortage or a surplus?'\nChoices:\n(A) shortage\n(B) surplus", + "choices": [ + "shortage", + "surplus" + ], + "answer": "surplus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "shortage", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 353, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "323": { + "question_id": "323", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Craig just downloaded the new game Gem Excavator on his phone. In the first level, Craig gains points for each green gem he finds. However, he loses points for each red gem he finds. The table shows how the gems affect Craig's points. Which color gem affects Craig's points less?'\nChoices:\n(A) green\n(B) red", + "choices": [ + "green", + "red" + ], + "answer": "green", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "green", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 94, + "img_width": 230, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "325": { + "question_id": "325", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Web Purple have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "327": { + "question_id": "327", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many items sold less than 1 units in at least one store?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "329": { + "question_id": "329", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The derivative of y at x=6 is ____ that at x=8\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "larger than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 2039, + "img_width": 2560, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "331": { + "question_id": "331", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Several people compared how many Web pages they had visited. What is the mean of the numbers?'", + "choices": null, + "answer": "64", + "extraction": "55", + "prediction": "55", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 311, + "img_width": 246, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "333": { + "question_id": "333", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find tan X\nChoices:\n(A) \\frac { 5 } { 12 }\n(B) \\frac { 12 } { 13 }\n(C) \\frac { 17 } { 12 }\n(D) \\frac { 12 } { 5 }", + "choices": [ + "\\frac { 5 } { 12 }", + "\\frac { 12 } { 13 }", + "\\frac { 17 } { 12 }", + "\\frac { 12 } { 5 }" + ], + "answer": "\\frac { 5 } { 12 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac { 5 } { 12 }", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 149, + "img_width": 297, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "335": { + "question_id": "335", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large brown matte balls. Subtract all blue cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "337": { + "question_id": "337", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) to eight.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "339": { + "question_id": "339", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u2299O\u4e2d\uff0cAB=AC\uff0c\u2220BAC\uff1d70\u00b0\uff0c\u5219\u2220AEC\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 65\u00b0\n(B) 75\u00b0\n(C) 50\u00b0\n(D) 55\u00b0", + "choices": [ + "65\u00b0", + "75\u00b0", + "50\u00b0", + "55\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 112, + "img_width": 115, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "341": { + "question_id": "341", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is six (_).\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "o'clock", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "343": { + "question_id": "343", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small purple metallic spheres. Subtract all small purple things. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "345": { + "question_id": "345", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many kites are there?", + "choices": null, + "answer": "25", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 429, + "img_width": 711, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "347": { + "question_id": "347", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of green metallic double buss less than the number of big purple rubber cruisers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "349": { + "question_id": "349", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which capability boasts the highest proportion (%)?\nChoices:\n(A) Rec\n(B) OCR\n(C) Know\n(D) Gen\n(E) Spat\n(F) Math", + "choices": [ + "Rec", + "OCR", + "Know", + "Gen", + "Spat", + "Math" + ], + "answer": "Rec", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Rec", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "bar chart", + "grade": "college", + "img_height": 1348, + "img_width": 1704, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "351": { + "question_id": "351", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer purple rubber objects that are to the left of the red object than tiny matte bicycles?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "353": { + "question_id": "353", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: At time $t=0$ a tank contains $Q_0 \\mathrm{lb}$ of salt dissolved in 100 gal of water; see Figure 2.3.1. Assume that water containing $\\frac{1}{4} \\mathrm{lb}$ of salt/gal is entering the tank at a rate of $r \\mathrm{gal} / \\mathrm{min}$ and that the well-stirred mixture is draining from the tank at the same rate. Set up the initial value problem that describes this flow process. By finding the amount of salt $Q(t)$ in the tank at any time, and the limiting amount $Q_L$ that is present after a very long time, if $r=3$ and $Q_0=2 Q_L$, find the time $T$ after which the salt level is within $2 \\%$ of $Q_L$.", + "choices": null, + "answer": "130.4", + "extraction": "1.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 938, + "img_width": 996, + "language": "english", + "skills": [ + "algebraic reasoning", + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "355": { + "question_id": "355", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the parallel lines a and b are intercepted by the straight line c. If \u22201 = 50.0, then the degree of \u22202 is ()\nChoices:\n(A) 150\u00b0\n(B) 130\u00b0\n(C) 110\u00b0\n(D) 100\u00b0", + "choices": [ + "150\u00b0", + "130\u00b0", + "110\u00b0", + "100\u00b0" + ], + "answer": "130\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "150\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 157, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "357": { + "question_id": "357", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Salmon the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 677, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "359": { + "question_id": "359", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Kylie spent a week at the beach and recorded the number of shells she found each day. According to the table, what was the rate of change between Thursday and Friday? (Unit: shells per day)", + "choices": null, + "answer": "-7", + "extraction": "-7", + "prediction": "-7", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 241, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "361": { + "question_id": "361", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the mold are the cylindrical ports located? \nChoices:\n(A) Upper half\n(B) Lower half\n(C) Medial half\n(D) Lateral half", + "choices": [ + "Upper half", + "Lower half", + "Medial half", + "Lateral half" + ], + "answer": "Lower half", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Upper half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "medical image", + "grade": "college", + "img_height": 435, + "img_width": 596, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "PMC-VQA", + "split": "testmini", + "task": "visual question answering" + }, + "363": { + "question_id": "363", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny gray metal blocks. Subtract all purple things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "365": { + "question_id": "365", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big yellow metallic spheres. Subtract all tiny metal things. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "367": { + "question_id": "367", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "14", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 429, + "img_width": 873, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "369": { + "question_id": "369", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function (f: R to R) surjective?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 331, + "img_width": 266, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "371": { + "question_id": "371", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220ABC\uff1d90\u00b0\uff0c\u70b9D\u3001E\u3001F\u5206\u522b\u662f\u8fb9AB\u3001BC\u3001CA\u7684\u4e2d\u70b9\uff0c\u82e5DE+BF\uff1d8\uff0c\u5219BF\u7684\u503c\u4e3a\uff08\uff09\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 146, + "img_width": 109, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "373": { + "question_id": "373", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the quadrilateral ABCD, \u2220BAD = 120.0, \u2220B = \u2220D = 90.0, if you find a point M on BC and CD respectively, so that the perimeter of \u25b3AMN is the smallest, then the degree of \u2220AMN + \u2220ANM is ()\nChoices:\n(A) 110\u00b0\n(B) 120\u00b0\n(C) 140\u00b0\n(D) 150\u00b0", + "choices": [ + "110\u00b0", + "120\u00b0", + "140\u00b0", + "150\u00b0" + ], + "answer": "120\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "110\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 161, + "img_width": 122, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "375": { + "question_id": "375", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the length of $AC$ in the isosceles triangle ABC. \nChoices:\n(A) 1.5\n(B) 7\n(C) 11\n(D) 12.5", + "choices": [ + "1.5", + "7", + "11", + "12.5" + ], + "answer": "7", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 293, + "img_width": 703, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "377": { + "question_id": "377", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 649, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "379": { + "question_id": "379", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram of the food web shown what will most directly be affected by the loss of the trees?\nChoices:\n(A) horses\n(B) cats\n(C) nothing\n(D) bears", + "choices": [ + "horses", + "cats", + "nothing", + "bears" + ], + "answer": "horses", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "horses", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 400, + "img_width": 570, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "381": { + "question_id": "381", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there more tiny cyan matte articulated buss left of the big school bus than small yellow matte double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "383": { + "question_id": "383", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What value you get , if you divide the largest bar value by 2 ?", + "choices": null, + "answer": "131253.5", + "extraction": "1000.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "385": { + "question_id": "385", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Cyan have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 771, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "387": { + "question_id": "387", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Of the four balls in the photo, what is the percentage of them on the ground?", + "choices": null, + "answer": "100", + "extraction": "100", + "prediction": "100", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 485, + "img_width": 363, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "389": { + "question_id": "389", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At a price of $320, is there a shortage or a surplus?'\nChoices:\n(A) shortage\n(B) surplus", + "choices": [ + "shortage", + "surplus" + ], + "answer": "shortage", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "shortage", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 353, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "391": { + "question_id": "391", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, point O is the center of \u2299O, points A, B, and C are on \u2299O, AO \u2225 BC, \u2220AOB = 40.0, then the degree of \u2220OAC is equal to ()\nChoices:\n(A) 40\u00b0\n(B) 60\u00b0\n(C) 50\u00b0\n(D) 20\u00b0", + "choices": [ + "40\u00b0", + "60\u00b0", + "50\u00b0", + "20\u00b0" + ], + "answer": "20\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 96, + "img_width": 96, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "393": { + "question_id": "393", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest and the lowest dark blue bar?", + "choices": null, + "answer": "54", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "395": { + "question_id": "395", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average age of the people in this picture?", + "choices": null, + "answer": "10", + "extraction": "25", + "prediction": "25", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "397": { + "question_id": "397", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9A\u3001B\u3001C\u90fd\u5728\u534a\u5f84\u4e3a2\u7684\u2299O\u4e0a\uff0c\u2220C\uff1d30\u00b0\uff0c\u5219\u5f26AB\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 2.2\n(D) 2.5", + "choices": [ + "1", + "2", + "2.2", + "2.5" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 70, + "img_width": 73, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "399": { + "question_id": "399", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "6", + "extraction": "6", + "prediction": "6", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 241, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "401": { + "question_id": "401", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "403": { + "question_id": "403", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find TX if $E X=24$ and $D E=7$\nChoices:\n(A) 7\n(B) 24\n(C) 25\n(D) 32", + "choices": [ + "7", + "24", + "25", + "32" + ], + "answer": "32", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 221, + "img_width": 564, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "405": { + "question_id": "405", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "19", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1351, + "img_width": 1801, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "407": { + "question_id": "407", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9B\uff0cD\uff0cE\uff0cC\u5728\u540c\u4e00\u6761\u76f4\u7ebf\u4e0a\uff0c\u82e5\u25b3ABD\u224c\u25b3ACE\uff0c\u2220AEC\uff1d110\u00b0\uff0c\u5219\u2220DAE\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 30\u00b0\n(B) 40\u00b0\n(C) 50\u00b0\n(D) 60\u00b0", + "choices": [ + "30\u00b0", + "40\u00b0", + "50\u00b0", + "60\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 67, + "img_width": 76, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "409": { + "question_id": "409", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the radius of this circle?", + "choices": null, + "answer": "5", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 356, + "img_width": 358, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "411": { + "question_id": "411", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average percentage of population having access to electricity per year?", + "choices": null, + "answer": "100", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1081, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "413": { + "question_id": "413", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5df2\u77e5\uff1a\u5982\u56fe\uff0c\u25b3ABC\u4e2d\uff0cAB\uff1dAC\uff0cBD\u4e3a\u2220ABC\u7684\u5e73\u5206\u7ebf\uff0c\u2220BDC\uff1d75\u00b0\uff0c\u5219\u2220A\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 25\u00b0\n(B) 35\u00b0\n(C) 40\u00b0\n(D) 45\u00b0", + "choices": [ + "25\u00b0", + "35\u00b0", + "40\u00b0", + "45\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 132, + "img_width": 123, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "415": { + "question_id": "415", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average annual wage in Slovak Republic in the year 2019", + "choices": null, + "answer": "15017", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "417": { + "question_id": "417", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "8", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 748, + "img_width": 564, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "419": { + "question_id": "419", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) after nine.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "421": { + "question_id": "421", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An elevator cab of mass $m=500 \\mathrm{~kg}$ is descending with speed $v_i=4.0 \\mathrm{~m} / \\mathrm{s}$ when its supporting cable begins to slip, allowing it to fall with constant acceleration $\\vec{a}=\\vec{g} / 5$.\r\nDuring the $12 \\mathrm{~m}$ fall, what is the work $W_T$ done on the cab by the upward pull $\\vec{T}$ of the elevator cable?", + "choices": null, + "answer": "-47", + "extraction": "1200", + "prediction": "1200", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 1190, + "img_width": 550, + "language": "english", + "skills": [ + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "423": { + "question_id": "423", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Deep Pink less than Dark Gray?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 577, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "425": { + "question_id": "425", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5728Rt\u25b3ABC\u4e2d\uff0c\u2220C\uff1d90\u00b0\uff0c\u82e5AC\uff1d6\uff0cBC\uff1d8\uff0c\u5219cosA\u7684\u503c\u4e3a\uff08\uff09\nChoices:\n(A) 0.6\n(B) 0.8\n(C) 0.75\n(D) \\frac{4}{3}", + "choices": [ + "0.6", + "0.8", + "0.75", + "\\frac{4}{3}" + ], + "answer": "0.6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.6", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 120, + "img_width": 171, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "427": { + "question_id": "427", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people prefer the most preferred object?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "429": { + "question_id": "429", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people prefer the least preferred object?", + "choices": null, + "answer": "10", + "extraction": "10", + "prediction": "10", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "431": { + "question_id": "431", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, what would happen to dragonfly if all mayfly dies\nChoices:\n(A) remains the same\n(B) increase\n(C) decrease\n(D) NA", + "choices": [ + "remains the same", + "increase", + "decrease", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "remains the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 297, + "img_width": 464, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "433": { + "question_id": "433", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "5", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 350, + "img_width": 425, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "435": { + "question_id": "435", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of employed females who are not attending school greater than the average percentage of employed females who are not attending school taken over all years ?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 955, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "437": { + "question_id": "437", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fig.Q3 shows an excerpt of the transmission phase of a TCP connection. Assume the length of the IP header is 20 bytes. What is the ACK number at message 6?", + "choices": null, + "answer": "839", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 814, + "img_width": 638, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "439": { + "question_id": "439", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: is this function convex?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 256, + "img_width": 539, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "441": { + "question_id": "441", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "9", + "extraction": "9", + "prediction": "9", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 241, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "443": { + "question_id": "443", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure: In Rt\u25b3ABC, \u2220C = 90.0, AC = 8.0, AB = 10.0, then the value of sinB is equal to ()\nChoices:\n(A) \\frac{3}{5}\n(B) \\frac{4}{5}\n(C) \\frac{3}{4}\n(D) \\frac{4}{3}", + "choices": [ + "\\frac{3}{5}", + "\\frac{4}{5}", + "\\frac{3}{4}", + "\\frac{4}{3}" + ], + "answer": "\\frac{4}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{3}{5}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 80, + "img_width": 169, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "445": { + "question_id": "445", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Slate less than Saddle Brown?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 436, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "447": { + "question_id": "447", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Midnight Blue intersect Purple?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 685, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "449": { + "question_id": "449", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many miles per gallon do the average motorcycle get on the highway?", + "choices": null, + "answer": "40", + "extraction": "40", + "prediction": "40", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "451": { + "question_id": "451", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of small yellow metallic choppers that are behind the large cyan thing less than the number of brown metal double buss that are behind the small yellow shiny thing?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "453": { + "question_id": "453", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "4", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 116, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "455": { + "question_id": "455", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If x = 32 and r = 18, what is the length of the arc shown in the figure above?\nChoices:\n(A) 16*\\pi/5\n(B) 32*\\pi/5\n(C) 36*\\pi\n(D) 288*\\pi/5\n(E) 576*\\pi", + "choices": [ + "16*\\pi/5", + "32*\\pi/5", + "36*\\pi", + "288*\\pi/5", + "576*\\pi" + ], + "answer": "16*\\pi/5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "16*\\pi/5", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 353, + "img_width": 575, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "457": { + "question_id": "457", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "4525", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 97, + "img_width": 605, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "459": { + "question_id": "459", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large cyan matte balls. Subtract all tiny shiny objects. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "461": { + "question_id": "461", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A perceptual audio codec is used to compress an audio signal. The codec groups every 4 barks into a subband and then allocates bits to different subbands according to the result of a spectrum analysis based on a psychoacoustic model. All samples in the same subband are quantized with the same quantizer, and the bit resolution of which is allocated by the codec. (The Bark scale is a psychoacoustical scale proposed by Eberhard Zwicker in 1961.) Fig. Q1a shows the frequency spectrum of a windowed segment of audio signal. The psychoacoustic model shown in Fig. Q1b is used in the audio codec to derive the masking threshold for the audio segment. How many potential maskers in Fig. Q1a?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 488, + "img_width": 908, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "463": { + "question_id": "463", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large gray things. Subtract all small brown metallic balls. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "465": { + "question_id": "465", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Green the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 628, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "467": { + "question_id": "467", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The degree measures of minor arc $\\widehat{A C}$ and major arc $\\widehat{A D C}$ are $x$ and $y$ respectively. If $m\u2220ABC = 70\u00b0$, find $x$.\nChoices:\n(A) 90\n(B) 100\n(C) 110\n(D) 120", + "choices": [ + "90", + "100", + "110", + "120" + ], + "answer": "110", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "90", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 235, + "img_width": 499, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "469": { + "question_id": "469", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Sky Blue less than Chartreuse?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "471": { + "question_id": "471", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Lily and her friends recorded their scores while playing a board game. Which score did the greatest number of people receive?'", + "choices": null, + "answer": "8", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 190, + "img_width": 351, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "473": { + "question_id": "473", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "12", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2604, + "img_width": 2500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "475": { + "question_id": "475", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 71, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "477": { + "question_id": "477", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past three.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "half", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "479": { + "question_id": "479", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How many times Norway data bigger than Italy data ?", + "choices": null, + "answer": "2.54", + "extraction": "1.4", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "481": { + "question_id": "481", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 404, + "img_width": 592, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "483": { + "question_id": "483", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, point C is on \u2299O, AE is the tangent of \u2299O, A is the tangent point, connect BC and extend to intersect AE at point D. If \u2220AOC = 80.0, then the degree of \u2220ADB is ()\nChoices:\n(A) 40\u00b0\n(B) 50\u00b0\n(C) 60\u00b0\n(D) 20\u00b0", + "choices": [ + "40\u00b0", + "50\u00b0", + "60\u00b0", + "20\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 129, + "img_width": 165, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "485": { + "question_id": "485", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9D\u5728\u7b49\u8fb9\u25b3ABC\u7684\u8fb9CB\u7684\u5ef6\u957f\u7ebf\u4e0a\uff0c\u70b9E\u5728\u7ebf\u6bb5BC\u4e0a\uff0c\u8fde\u63a5AD\uff0cAE\uff0c\u82e5DA\uff1dDE\uff0c\u4e14\u2220DAB\uff1d20\u00b0\uff0c\u90a3\u4e48\u2220EAC\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 20\u00b0\n(B) 15\u00b0\n(C) 10\u00b0\n(D) 5\u00b0", + "choices": [ + "20\u00b0", + "15\u00b0", + "10\u00b0", + "5\u00b0" + ], + "answer": "10\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 235, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "487": { + "question_id": "487", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer big cars behind the small brown shiny mountain bike than tiny objects on the right side of the bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "489": { + "question_id": "489", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For trapezoid ABCD shown above, AB = 24, AD = 23, and BC = 16. What is the length of segment CD?", + "choices": null, + "answer": "25", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 297, + "img_width": 426, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "491": { + "question_id": "491", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 540, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "493": { + "question_id": "493", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function differentiable at every point?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 847, + "img_width": 800, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "495": { + "question_id": "495", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer green things in front of the blue metallic car than choppers right of the chopper?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "497": { + "question_id": "497", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "499": { + "question_id": "499", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Quadrilateral $ABDC$ is a rectangle. If $m\\angle1 = 38$, find $m \\angle 2$\nChoices:\n(A) 33\n(B) 38\n(C) 52\n(D) 87", + "choices": [ + "33", + "38", + "52", + "87" + ], + "answer": "52", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "33", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 323, + "img_width": 559, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "501": { + "question_id": "501", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big red rubber cylinders. Subtract all blue objects. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "503": { + "question_id": "503", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the leftmost and the center person? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "40", + "prediction": "40", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 225, + "img_width": 338, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "505": { + "question_id": "505", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the circle O with a radius of 5.0, the length of the chord AB is 8.0, then the distance from the center O to the chord AB is ()\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 100, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "507": { + "question_id": "507", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen if the hawk population increased?\nChoices:\n(A) mice would increase\n(B) sparrows increased\n(C) garter snakes would decrease\n(D) grass decreased", + "choices": [ + "mice would increase", + "sparrows increased", + "garter snakes would decrease", + "grass decreased" + ], + "answer": "garter snakes would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "mice would increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "509": { + "question_id": "509", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cadet Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 400, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "511": { + "question_id": "511", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people like the most preferred object in the whole chart?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "513": { + "question_id": "513", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the highest value in states that border West Virginia ?\nChoices:\n(A) 43.2%-63.6%\n(B) 45.2%-65.6%\n(C) 42.2%-62.6%\n(D) 41.2%-61.6%\n(E) 44.2%-64.6%", + "choices": [ + "43.2%-63.6%", + "45.2%-65.6%", + "42.2%-62.6%", + "41.2%-61.6%", + "44.2%-64.6%" + ], + "answer": "42.2%-62.6%", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "43.2%-63.6%", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "515": { + "question_id": "515", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: You would potentially see a decrease in which organism if gulls disappeared?\nChoices:\n(A) herring\n(B) kril\n(C) anchovy\n(D) phytoplankton", + "choices": [ + "herring", + "kril", + "anchovy", + "phytoplankton" + ], + "answer": "kril", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "herring", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 549, + "img_width": 398, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "517": { + "question_id": "517", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: At Bloomington Consulting, the head of human resources examined how the number of employees with health care benefits varied in response to policy changes. According to the table, what was the rate of change between 2014 and 2015? (Unit: employees per year)", + "choices": null, + "answer": "-1", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 275, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "519": { + "question_id": "519", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many Triangles do you see in the picture?", + "choices": null, + "answer": "12", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 852, + "img_width": 948, + "language": "english", + "skills": [ + "logical reasoning", + "geometry reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "521": { + "question_id": "521", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, point C is a point on \u2299O, \u2220C = 20.0, then the degree of \u2220BOC is ()\nChoices:\n(A) 20\u00b0\n(B) 30\u00b0\n(C) 40\u00b0\n(D) 60\u00b0", + "choices": [ + "20\u00b0", + "30\u00b0", + "40\u00b0", + "60\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 100, + "img_width": 120, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "523": { + "question_id": "523", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, a teaching interest group wants to measure the height of a tree CD. They firstly measured the elevation angle of the tree top C at point A as 30.0, and then proceeded 10.0 along the direction of AD to point B, and the elevation angle of tree top C measured at B is 60.0 (the three points A, B, and D are on the same straight line), then the height of the tree CD is ()\nChoices:\n(A) 10m\n(B) 5m\n(C) 5\u221a{3}m\n(D) 10\u221a{3}m", + "choices": [ + "10m", + "5m", + "5\u221a{3}m", + "10\u221a{3}m" + ], + "answer": "5\u221a{3}m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 179, + "img_width": 285, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "525": { + "question_id": "525", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest value shown on the X axis of first plot?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2209, + "img_width": 1711, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "527": { + "question_id": "527", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big shiny cars in front of the red airliner greater than the number of big purple road bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "529": { + "question_id": "529", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what number does the smaller arrow point to?", + "choices": null, + "answer": "1020", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 768, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "531": { + "question_id": "531", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) to five.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "533": { + "question_id": "533", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small cyan cubes. Subtract all large yellow rubber cubes. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "535": { + "question_id": "535", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "-8", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "537": { + "question_id": "537", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of red rubber bicycles less than the number of cyan metal school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "539": { + "question_id": "539", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u70b9D\u3001E\u5206\u522b\u662f\u8fb9AB\u3001BC\u7684\u4e2d\u70b9\uff0c\u82e5\u25b3BDE\u7684\u5468\u957f\u662f6\uff0c\u5219\u25b3ABC\u7684\u5468\u957f\u662f\uff08\uff09\nChoices:\n(A) 8\n(B) 10\n(C) 12\n(D) 14", + "choices": [ + "8", + "10", + "12", + "14" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "8", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 71, + "img_width": 149, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "541": { + "question_id": "541", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the cubes is not identical to the unfolded net?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "D", + "extraction": "D", + "prediction": "D", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 560, + "img_width": 280, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "543": { + "question_id": "543", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer small purple matte cars than brown matte things?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "545": { + "question_id": "545", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Violet Red less than Crimson?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 764, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "547": { + "question_id": "547", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram below, which organisms will be most directly affected by a decrease in the amount of grass?\nChoices:\n(A) Insects\n(B) Hawk and snake\n(C) Snake and raccoon\n(D) Mouse and cricket", + "choices": [ + "Insects", + "Hawk and snake", + "Snake and raccoon", + "Mouse and cricket" + ], + "answer": "Insects", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Insects", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 377, + "img_width": 630, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "549": { + "question_id": "549", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, PA and PB are tangent to \u2299O to A and B respectively. Point C and point D are the moving points on line segments PA and PB, and CD always remains tangent to circle O. If PA = 8.0, then perimeter of \u25b3PCD is ()\nChoices:\n(A) 8\n(B) 12\n(C) 16\n(D) \u4e0d\u80fd\u786e\u5b9a", + "choices": [ + "8", + "12", + "16", + "\u4e0d\u80fd\u786e\u5b9a" + ], + "answer": "16", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "8", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 192, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "551": { + "question_id": "551", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest tattoos in male and the least in female?", + "choices": null, + "answer": "14", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "553": { + "question_id": "553", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Violet less than Chocolate?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "555": { + "question_id": "555", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this nest larger than a fist?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 640, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "557": { + "question_id": "557", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220BAC\uff1d90\u00b0\uff0c\u4ee5Rt\u25b3ABC\u7684\u4e09\u8fb9\u4e3a\u8fb9\u5206\u522b\u5411\u5916\u4f5c\u7b49\u8fb9\u4e09\u89d2\u5f62\u25b3A'BC\uff0c\u25b3AB'C\uff0c\u25b3ABC'\uff0c\u82e5\u25b3A'BC\uff0c\u25b3AB'C\u7684\u9762\u79ef\u5206\u522b\u662f10\u548c4\uff0c\u5219\u25b3ABC'\u7684\u9762\u79ef\u662f\uff08\uff09\nChoices:\n(A) 4\n(B) 6\n(C) 8\n(D) 9", + "choices": [ + "4", + "6", + "8", + "9" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 130, + "img_width": 155, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "559": { + "question_id": "559", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the highest number shown on the black outer part of the watch?", + "choices": null, + "answer": "55", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 768, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "561": { + "question_id": "561", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of gray rubber double buss right of the small red aeroplane the same as the number of small objects that are left of the tiny gray matte bicycle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "563": { + "question_id": "563", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which number on the monitor is higher?\nChoices:\n(A) top\n(B) bottom\n(C) left\n(D) right", + "choices": [ + "top", + "bottom", + "left", + "right" + ], + "answer": "bottom", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "top", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "565": { + "question_id": "565", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model can achieve the best ImageNet 10-shot Accuracy score?\nChoices:\n(A) Soft MoE\n(B) Experts Choice\n(C) Tokens Choice\n(D) Dense", + "choices": [ + "Soft MoE", + "Experts Choice", + "Tokens Choice", + "Dense" + ], + "answer": "Soft MoE", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Soft MoE", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 978, + "img_width": 1966, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "567": { + "question_id": "567", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the slug to the nearest inch. The slug is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 252, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "569": { + "question_id": "569", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which subject had the highest pulse rate in baseline period?", + "choices": null, + "answer": "1", + "extraction": "15", + "prediction": "15", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2284, + "img_width": 1786, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "571": { + "question_id": "571", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 613, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "573": { + "question_id": "573", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A race car driver kept track of how many laps he drove in the past 5 days. What is the mode of the numbers?'", + "choices": null, + "answer": "53", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 203, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "575": { + "question_id": "575", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Lines $l$, $m$, and $n$ are perpendicular bisectors of $\\triangle PQR$ and meet at $T$. If $TQ = 2x$, $PT = 3y - 1$, and $TR = 8$, find $z$.\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 287, + "img_width": 509, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "577": { + "question_id": "577", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Consider the following matrices:\r\n$$\r\n\\mathbf{A}=\\left(\\begin{array}{rrr}\r\n1 & 2 & -1 \\\\\r\n0 & 3 & 1 \\\\\r\n2 & 0 & 1\r\n\\end{array}\\right), \\quad \\mathbf{B}=\\left(\\begin{array}{rrr}\r\n2 & 1 & 0 \\\\\r\n0 & -1 & 2 \\\\\r\n1 & 1 & 3\r\n\\end{array}\\right), \\quad \\mathbf{C}=\\left(\\begin{array}{ll}\r\n2 & 1 \\\\\r\n4 & 3 \\\\\r\n1 & 0\r\n\\end{array}\\right)\r\n$$\r\nFind $|\\mathbf{A B}|$.", + "choices": null, + "answer": "-104", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 142, + "img_width": 533, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "579": { + "question_id": "579", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average number of documents required per shipment to export goods in Uganda per year?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1228, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "581": { + "question_id": "581", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large matte cubes. Subtract all matte blocks. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "583": { + "question_id": "583", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x. Round to the nearest tenth.\r\n\nChoices:\n(A) 5.8\n(B) 6.5\n(C) 14.2\n(D) 44.3", + "choices": [ + "5.8", + "6.5", + "14.2", + "44.3" + ], + "answer": "5.8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5.8", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 465, + "img_width": 319, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "585": { + "question_id": "585", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u77e9\u5f62ABCD\u4e2d\uff0cAB\uff1d2\uff0c\u2220AOB\uff1d60\u00b0\uff0c\u5219BD\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 4\n(B) 3\n(C) 2\n(D) 2\u221a{3}", + "choices": [ + "4", + "3", + "2", + "2\u221a{3}" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 148, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "587": { + "question_id": "587", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At 9.0 in the morning, a ship departs from point A and sails in the direction due east at a speed of 40.0 nautical miles per hour, and arrives at point B at 9.0 and 30.0 minutes. As shown in the figure, the island M is measured from A and B. In the direction of 45.0 north by east and 15.0 north by east, then the distance between B and island M is ()\nChoices:\n(A) 20\u6d77\u91cc\n(B) 20\u221a{2}\u6d77\u91cc\n(C) 15\u6d77\u91cc\n(D) 20\u6d77\u91cc", + "choices": [ + "20\u6d77\u91cc", + "20\u221a{2}\u6d77\u91cc", + "15\u6d77\u91cc", + "20\u6d77\u91cc" + ], + "answer": "20\u221a{2}\u6d77\u91cc", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u6d77\u91cc", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 124, + "img_width": 144, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "589": { + "question_id": "589", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number of things are either large objects behind the shiny double bus or tiny gray metal objects?", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "591": { + "question_id": "591", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 600, + "img_width": 900, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "593": { + "question_id": "593", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average of longest light blue bar and shortest gray bar?", + "choices": null, + "answer": "273", + "extraction": "111", + "prediction": "111", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "595": { + "question_id": "595", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Navy Blue the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "597": { + "question_id": "597", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people prefer the least preferred object?", + "choices": null, + "answer": "10", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "599": { + "question_id": "599", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, AC = 6 and BC = 3. Point P lies on line AB between A and B such that line CP is perpendicular to line AB. Which of the following could be the length of line CP?\nChoices:\n(A) 2\n(B) 4\n(C) 5\n(D) 7\n(E) 8", + "choices": [ + "2", + "4", + "5", + "7", + "8" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 340, + "img_width": 393, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "601": { + "question_id": "601", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What's the ratio of smallest segment and second largest segment?", + "choices": null, + "answer": "0.33", + "extraction": "0.17", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 386, + "img_width": 210, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "603": { + "question_id": "603", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is cumulative increase in weight ( in grams) for \"GROUP C\" in third week ( give an approximate value) ?", + "choices": null, + "answer": "300", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2237, + "img_width": 1754, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "605": { + "question_id": "605", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large green matte cubes. Subtract all big green blocks. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "607": { + "question_id": "607", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow shiny things. Subtract all yellow metal things. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "609": { + "question_id": "609", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big green matte cylinders. Subtract all big brown cubes. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "611": { + "question_id": "611", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A shipping company keeps track of the number of boxes in each shipment they send out. How many shipments had exactly 56 boxes? (Unit: shipments)", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 180, + "img_width": 153, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "613": { + "question_id": "613", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many houses are there?", + "choices": null, + "answer": "10", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 87, + "img_width": 473, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "615": { + "question_id": "615", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If two sides of a triangle measure 12 and 7, which of the following cannot be the perimeter of the triangle?\nChoices:\n(A) 29\n(B) 34\n(C) 37\n(D) 38", + "choices": [ + "29", + "34", + "37", + "38" + ], + "answer": "38", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "29", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 195, + "img_width": 522, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "617": { + "question_id": "617", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The magnitude of the acceleration vector a is $10 \\mathrm{~cm} / \\mathrm{s}^2$. Use the figure to estimate the normal components of $\\mathbf{a}$.", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 484, + "img_width": 478, + "language": "english", + "skills": [ + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "619": { + "question_id": "619", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(4)?", + "choices": null, + "answer": "16", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 666, + "img_width": 970, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "621": { + "question_id": "621", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The figure above is composed of 25 small triangles that are congruent and equilateral. If the area of triangle DFH is 10, what is the area of triangle AFK?\nChoices:\n(A) 40\n(B) 42.5\n(C) 50\n(D) 52.5\n(E) 62.5", + "choices": [ + "40", + "42.5", + "50", + "52.5", + "62.5" + ], + "answer": "62.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 315, + "img_width": 397, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "623": { + "question_id": "623", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is twelve (_).\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "o'clock", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "625": { + "question_id": "625", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of blue matte school buss greater than the number of large cyan metallic jets?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "627": { + "question_id": "627", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Some friends played a trivia game and recorded their scores. What is the mode of the numbers?'", + "choices": null, + "answer": "6", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 311, + "img_width": 155, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "629": { + "question_id": "629", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people prefer the object hut?", + "choices": null, + "answer": "20", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "631": { + "question_id": "631", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "633": { + "question_id": "633", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, $m\u22201 = 123$. Find the measure of $\\angle 14$.\nChoices:\n(A) 47\n(B) 57\n(C) 67\n(D) 123", + "choices": [ + "47", + "57", + "67", + "123" + ], + "answer": "57", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "47", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 330, + "img_width": 361, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "635": { + "question_id": "635", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, E is any point in \u25b1ABCD, if S~quadrilateral ABCD~ = 6.0, then the area of \u200b\u200bthe shaded part in the figure is ()\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5", + "choices": [ + "2", + "3", + "4", + "5" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 86, + "img_width": 179, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "637": { + "question_id": "637", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfa\u2225b\uff0c\u76f4\u7ebfa\u4e0e\u77e9\u5f62ABCD\u7684\u8fb9AB\uff0cAD\u5206\u522b\u4ea4\u4e8e\u70b9E\uff0cF\uff0c\u76f4\u7ebfb\u4e0e\u77e9\u5f62ABCD\u7684\u8fb9CB\uff0cCD\u5206\u522b\u4ea4\u4e8e\u70b9G\uff0cH\uff0e\u82e5\u2220AFE\uff1d30\u00b0\uff0c\u5219\u2220DHG\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 100\u00b0\n(B) 110\u00b0\n(C) 120\u00b0\n(D) 130\u00b0", + "choices": [ + "100\u00b0", + "110\u00b0", + "120\u00b0", + "130\u00b0" + ], + "answer": "120\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "100\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 108, + "img_width": 166, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "639": { + "question_id": "639", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What does the dial indicate as the top facing number?", + "choices": null, + "answer": "475", + "extraction": "450", + "prediction": "450", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1024, + "img_width": 768, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VizWiz", + "split": "testmini", + "task": "visual question answering" + }, + "641": { + "question_id": "641", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: The graph of the concentration function $c(t)$ is shown after a 7-mg injection of dye into a heart. Use Simpson's Rule to estimate the cardiac output.", + "choices": null, + "answer": "5.77", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 420, + "img_width": 828, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "643": { + "question_id": "643", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, CD is the diameter of \u2299O, chord DE \u2225 OA, if the degree of \u2220D is 50.0, then the degree of \u2220C is ()\nChoices:\n(A) 25\u00b0\n(B) 30\u00b0\n(C) 40\u00b0\n(D) 50\u00b0", + "choices": [ + "25\u00b0", + "30\u00b0", + "40\u00b0", + "50\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 125, + "img_width": 111, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "645": { + "question_id": "645", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAC\uff0cBD\u662f\u83f1\u5f62ABCD\u7684\u5bf9\u89d2\u7ebf\uff0cBH\u22a5AD\u4e8e\u70b9H\uff0c\u82e5AC\uff1d4\uff0cBD\uff1d3\uff0c\u5219BH\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 2.4\n(B) 2.5\n(C) 4.8\n(D) 5", + "choices": [ + "2.4", + "2.5", + "4.8", + "5" + ], + "answer": "2.4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2.4", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 113, + "img_width": 139, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "647": { + "question_id": "647", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the top view.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "B", + "extraction": "B", + "prediction": "B", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 900, + "img_width": 600, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "649": { + "question_id": "649", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many values are below 30 in Mainly are incidents of individual misconduct?", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 461, + "img_width": 310, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "651": { + "question_id": "651", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For an assignment, Johnny looked at which countries got the most Nobel Prizes in various decades. In the 1990s, how many more Nobel Prize winners did Canada have than Italy? (Unit: Nobel Prize winners)", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 156, + "img_width": 224, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "653": { + "question_id": "653", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there at least three distinct shades of blue in this photo?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 425, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "655": { + "question_id": "655", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the value of Russia has the highest transport?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 507, + "img_width": 858, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "657": { + "question_id": "657", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Arkansas have a higher value than Indiana ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "659": { + "question_id": "659", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest value of navy blue bar?", + "choices": null, + "answer": "991", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "661": { + "question_id": "661", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this function most likely be?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a trigonometric function", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1274, + "img_width": 1732, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "663": { + "question_id": "663", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past six.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "665": { + "question_id": "665", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $h$ in the triangle.\nChoices:\n(A) 4.62\n(B) 5.66\n(C) 6.93\n(D) 8", + "choices": [ + "4.62", + "5.66", + "6.93", + "8" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4.62", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 161, + "img_width": 275, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "667": { + "question_id": "667", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year has the least difference between the used and new cars?", + "choices": null, + "answer": "2015", + "extraction": "2014", + "prediction": "2014", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "669": { + "question_id": "669", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, line segment AB = 10.0, M is the midpoint of line segment AB, C is the midpoint of line segment MB, N is a point of line segment AM, and MN = 1.0, the length of line segment NC ()\nChoices:\n(A) 2\n(B) 2.5\n(C) 3\n(D) 3.5", + "choices": [ + "2", + "2.5", + "3", + "3.5" + ], + "answer": "3.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 18, + "img_width": 187, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "671": { + "question_id": "671", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the size of the semicircle rounded to 2 decimal places?", + "choices": null, + "answer": "14.14", + "extraction": "1.57", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 312, + "img_width": 433, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "673": { + "question_id": "673", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of large green cars less than the number of brown rubber double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "675": { + "question_id": "675", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the cross section of a small reservoir dam is a right trapezoid, the width of crest BC is 6.0, the height of dam is 14.0, and the slope of the slope CD is i = 1.0:2.0, then the length of the dam bottom AD is ()\nChoices:\n(A) 13m\n(B) 34m\n(C) (6+14\u221a{3})m\n(D) 40m", + "choices": [ + "13m", + "34m", + "(6+14\u221a{3})m", + "40m" + ], + "answer": "34m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "13m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 83, + "img_width": 183, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "677": { + "question_id": "677", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of dirtbikes right of the large blue object less than the number of small green metallic cars in front of the tiny matte bicycle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "679": { + "question_id": "679", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b1ABCD, the diagonal AC and BD intersect at point O, if AC = 12.0, BD = 8.0, AB = 7.0, then the perimeter of \u25b3OAB is ()\nChoices:\n(A) 15\n(B) 17\n(C) 21\n(D) 27", + "choices": [ + "15", + "17", + "21", + "27" + ], + "answer": "17", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 73, + "img_width": 173, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "681": { + "question_id": "681", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the largest city in the nation where this plane is headquartered?\nChoices:\n(A) hong kong\n(B) osaka\n(C) shanghai\n(D) tokyo", + "choices": [ + "hong kong", + "osaka", + "shanghai", + "tokyo" + ], + "answer": "tokyo", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "hong kong", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "683": { + "question_id": "683", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 157, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "685": { + "question_id": "685", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to organism c if organism b increased?\nChoices:\n(A) decrease\n(B) increase\n(C) can't predict\n(D) stay same", + "choices": [ + "decrease", + "increase", + "can't predict", + "stay same" + ], + "answer": "increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 246, + "img_width": 574, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "687": { + "question_id": "687", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What could happen that would increase the number of krill?\nChoices:\n(A) increase in phytoplankton\n(B) decrease in penguins\n(C) increase in fish\n(D) increase in birds", + "choices": [ + "increase in phytoplankton", + "decrease in penguins", + "increase in fish", + "increase in birds" + ], + "answer": "increase in phytoplankton", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "increase in phytoplankton", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 396, + "img_width": 576, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "689": { + "question_id": "689", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are these people sitting in a circle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "691": { + "question_id": "691", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Calculate the missing item.", + "choices": null, + "answer": "256", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 500, + "img_width": 596, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "693": { + "question_id": "693", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the orange larger than the car?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "695": { + "question_id": "695", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Salmon greater than Dark Orchid?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 734, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "697": { + "question_id": "697", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the parallelogram ABCD, it is known that AB = 6.0, BC = 9.0, \u2220B = 30.0, then the area of \u200b\u200bthe parallelogram ABCD is ()\nChoices:\n(A) 12\n(B) 18\n(C) 27\n(D) 54", + "choices": [ + "12", + "18", + "27", + "54" + ], + "answer": "27", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "12", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 68, + "img_width": 205, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "699": { + "question_id": "699", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the center and the rightmost person? (Unit: years)", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2684, + "img_width": 4577, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "701": { + "question_id": "701", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 109, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "703": { + "question_id": "703", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the sum of highest value and lowest value of navy blue bar?", + "choices": null, + "answer": "2372.1", + "extraction": "1000.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "705": { + "question_id": "705", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the heart wider than more than half the width of the thorax?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "medical image", + "grade": "college", + "img_height": 512, + "img_width": 419, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "VQA-RAD", + "split": "testmini", + "task": "visual question answering" + }, + "707": { + "question_id": "707", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0ca\u2225b\uff0c\u22201\uff1d60\u00b0\uff0c\u5219\u22202\u7684\u5927\u5c0f\u662f\uff08\uff09\nChoices:\n(A) 60\u00b0\n(B) 80\u00b0\n(C) 100\u00b0\n(D) 120\u00b0", + "choices": [ + "60\u00b0", + "80\u00b0", + "100\u00b0", + "120\u00b0" + ], + "answer": "120\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 120, + "img_width": 154, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "709": { + "question_id": "709", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(0)?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 393, + "img_width": 552, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "711": { + "question_id": "711", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 270, + "img_width": 369, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "713": { + "question_id": "713", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $x$.\nChoices:\n(A) 3\n(B) 4\n(C) 6\n(D) 7", + "choices": [ + "3", + "4", + "6", + "7" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 422, + "img_width": 521, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "715": { + "question_id": "715", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this a periodic function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1920, + "img_width": 1920, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "717": { + "question_id": "717", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is \\int_1^{\\infty} {1\\over x^{0.99}} dx finite according to this graph ?\n\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 350, + "img_width": 314, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "719": { + "question_id": "719", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Brenda graphed the daily low temperature for 5 days. What is the range of the numbers?'", + "choices": null, + "answer": "13", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 225, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "721": { + "question_id": "721", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many odd functions are in the graph?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 297, + "img_width": 441, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "723": { + "question_id": "723", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function convex?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 277, + "img_width": 468, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "725": { + "question_id": "725", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In Figure, suppose that Barbara's velocity relative to Alex is a constant $v_{B A}=52 \\mathrm{~km} / \\mathrm{h}$ and car $P$ is moving in the negative direction of the $x$ axis.\r\n(a) If Alex measures a constant $v_{P A}=-78 \\mathrm{~km} / \\mathrm{h}$ for car $P$, what velocity $v_{P B}$ will Barbara measure?", + "choices": null, + "answer": "-130", + "extraction": "-26", + "prediction": "-26", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 690, + "img_width": 976, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "727": { + "question_id": "727", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the largest and the smallest value in the chart?", + "choices": null, + "answer": "70", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "729": { + "question_id": "729", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest accuracy reported in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "731": { + "question_id": "731", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The train conductor made sure to count the number of passengers on each train. What is the smallest number of passengers? (Unit: passengers)", + "choices": null, + "answer": "40", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 180, + "img_width": 159, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "733": { + "question_id": "733", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Square ABCD. CT: tangent to semicircle. Find the angle \u2220CTD. Return the numeric value.", + "choices": null, + "answer": "63.4", + "extraction": "135.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 1018, + "img_width": 972, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "735": { + "question_id": "735", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big cyan things in front of the cyan rubber suv less than the number of big suvs that are behind the red bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "737": { + "question_id": "737", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the perimeter of the parallelogram.\nChoices:\n(A) 32\n(B) 39\n(C) 46\n(D) 78", + "choices": [ + "32", + "39", + "46", + "78" + ], + "answer": "78", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "32", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 179, + "img_width": 352, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "739": { + "question_id": "739", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Hannah need to buy a baking dish and a cookie jar? (Unit: $)", + "choices": null, + "answer": "23", + "extraction": "24", + "prediction": "24", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 160, + "img_width": 201, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "741": { + "question_id": "741", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "13", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1080, + "img_width": 1920, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "743": { + "question_id": "743", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the different between the highest unemployment rate and the lowest?", + "choices": null, + "answer": "10.53", + "extraction": "1.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "745": { + "question_id": "745", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2832, + "img_width": 4256, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "747": { + "question_id": "747", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\odot M$, $FL=24,HJ=48$, and $m \\widehat {HP}=65$. Find $m \\widehat {HJ}$.\nChoices:\n(A) 65\n(B) 120\n(C) 130\n(D) 155", + "choices": [ + "65", + "120", + "130", + "155" + ], + "answer": "130", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 467, + "img_width": 507, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "749": { + "question_id": "749", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b3ABC, DE \u2225 BC, if AB = 7.0, AC = 5.0, AD = 3.0, then DE = ()\nChoices:\n(A) \\frac{15}{4}cm\n(B) \\frac{20}{3}cm\n(C) \\frac{15}{7}cm\n(D) \\frac{20}{7}cm", + "choices": [ + "\\frac{15}{4}cm", + "\\frac{20}{3}cm", + "\\frac{15}{7}cm", + "\\frac{20}{7}cm" + ], + "answer": "\\frac{20}{7}cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{15}{4}cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 98, + "img_width": 181, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "751": { + "question_id": "751", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would most likely happen if Artemia was removed?\nChoices:\n(A) Seahorses would decrease\n(B) Rotifers would decrease\n(C) Mysids would decrease\n(D) Algae would decrease", + "choices": [ + "Seahorses would decrease", + "Rotifers would decrease", + "Mysids would decrease", + "Algae would decrease" + ], + "answer": "Seahorses would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Seahorses would decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 363, + "img_width": 862, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "753": { + "question_id": "753", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "755": { + "question_id": "755", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this function most likely be?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a polynomial", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 776, + "img_width": 1430, + "language": "english", + "skills": [ + "algebraic reasoning", + "statistical reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "757": { + "question_id": "757", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x to the nearest tenth. Assume that segments that appear to be tangent are tangent.\nChoices:\n(A) 7.2\n(B) 8\n(C) 12\n(D) 15", + "choices": [ + "7.2", + "8", + "12", + "15" + ], + "answer": "7.2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7.2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 165, + "img_width": 220, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "759": { + "question_id": "759", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 201, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "761": { + "question_id": "761", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens to the crayfish population if the Largemouth Bass and Northern Pike populations decrease?\nChoices:\n(A) Nothing\n(B) Decrease\n(C) Slightly Decrease\n(D) Increase", + "choices": [ + "Nothing", + "Decrease", + "Slightly Decrease", + "Increase" + ], + "answer": "Increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Nothing", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 319, + "img_width": 405, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "763": { + "question_id": "763", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny shiny balls. Subtract all purple objects. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "765": { + "question_id": "765", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Chartreuse the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 514, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "767": { + "question_id": "767", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the maximum value of y?", + "choices": null, + "answer": "5", + "extraction": "25", + "prediction": "25", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 429, + "img_width": 483, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "769": { + "question_id": "769", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagram below is a model of two solutions. Each blue ball represents one particle of solute. Which solution has a higher concentration of blue particles?\nChoices:\n(A) neither; their concentrations are the same\n(B) Solution A\n(C) Solution B", + "choices": [ + "neither; their concentrations are the same", + "Solution A", + "Solution B" + ], + "answer": "Solution A", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; their concentrations are the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "elementary school", + "img_height": 251, + "img_width": 378, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "771": { + "question_id": "771", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Base your answers on the diagram of a food chain below and on your knowledge of science. If the population of snakes increases, the population of frogs will most likely\nChoices:\n(A) decrease\n(B) remain the same\n(C) increase\n(D) None", + "choices": [ + "decrease", + "remain the same", + "increase", + "None" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 720, + "img_width": 960, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "773": { + "question_id": "773", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, point D is on the extended line of AB, passing point D is the tangent of \u2299O, and the tangent point is C, if \u2220A = 25.0, then \u2220D = ()\nChoices:\n(A) 25\u00b0\n(B) 40\u00b0\n(C) 50\u00b0\n(D) 65\u00b0", + "choices": [ + "25\u00b0", + "40\u00b0", + "50\u00b0", + "65\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 117, + "img_width": 163, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "775": { + "question_id": "775", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 724, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "777": { + "question_id": "777", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In rhombus LMPQ, $m \\angle Q L M=2 x^{2}-10$, $m \\angle Q P M=8 x$, and $M P=10$ . \r\nFind the perimeter of $LMPQ$\nChoices:\n(A) 10\n(B) 40\n(C) 70\n(D) 140", + "choices": [ + "10", + "40", + "70", + "140" + ], + "answer": "40", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 177, + "img_width": 337, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "779": { + "question_id": "779", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the cardiac silhouette less than half the diameter of the diaphragm?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "medical image", + "grade": "college", + "img_height": 841, + "img_width": 1023, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "VQA-RAD", + "split": "testmini", + "task": "visual question answering" + }, + "781": { + "question_id": "781", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\triangle CDF$, $K$ is the centroid and $DK=16$. Find $CD$.\nChoices:\n(A) 9\n(B) 12\n(C) 18\n(D) 18", + "choices": [ + "9", + "12", + "18", + "18" + ], + "answer": "18", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "9", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 540, + "img_width": 461, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "783": { + "question_id": "783", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In order to measure the width of parallel river AB, \u2220ACB = 30.0, \u2220ADB = 60.0, CD = 60.0, then the width of the river AB is ()\nChoices:\n(A) 30m\n(B) 30\u221a{3}m\n(C) (30\u221a{3}+30)m\n(D) (30\u221a{3}-30)m", + "choices": [ + "30m", + "30\u221a{3}m", + "(30\u221a{3}+30)m", + "(30\u221a{3}-30)m" + ], + "answer": "30\u221a{3}m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 87, + "img_width": 130, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "785": { + "question_id": "785", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Part of an ecosystem is shown in this diagram. Imagine the algae and floating plants are prevented from growing. How will that most likely affect this ecosystem?\nChoices:\n(A) The number of ducks will increase\n(B) The number of minnows will increase\n(C) There will be no effect on this ecosystem\n(D) The number of aquatic crustaceans will decrease", + "choices": [ + "The number of ducks will increase", + "The number of minnows will increase", + "There will be no effect on this ecosystem", + "The number of aquatic crustaceans will decrease" + ], + "answer": "The number of aquatic crustaceans will decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "The number of ducks will increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 258, + "img_width": 456, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "787": { + "question_id": "787", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of the zebra's stripes are horizontal?", + "choices": null, + "answer": "50", + "extraction": "90", + "prediction": "90", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "789": { + "question_id": "789", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the values of posse and mortar?", + "choices": null, + "answer": "10", + "extraction": "10", + "prediction": "10", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "791": { + "question_id": "791", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Given $V_s$ = 5V, $R_1$ = 1k\u03a9, $R_2$ = 2.2k\u03a9, $R_3$ = 2.2k\u03a9, $R_4$ = 1.5k\u03a9, and $R_L$ = 4.7k\u03a9. Determine the voltage and current across $R_L$. Answer in unit of V (3 sig.fig.).", + "choices": null, + "answer": "1.06", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 400, + "img_width": 444, + "language": "english", + "skills": [ + "algebraic reasoning", + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "793": { + "question_id": "793", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest Elo score for the agent using an offline RL algorithm?", + "choices": null, + "answer": "1578", + "extraction": "179", + "prediction": "179", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1056, + "img_width": 1922, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "795": { + "question_id": "795", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "75", + "extraction": "30", + "prediction": "30", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 601, + "img_width": 475, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "797": { + "question_id": "797", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the missing pattern in the picture?\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4\n(E) 5\n(F) 6", + "choices": [ + "1", + "2", + "3", + "4", + "5", + "6" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 291, + "img_width": 386, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "799": { + "question_id": "799", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Ruth need to buy a baking dish, a casserole dish, and an ice cream scoop? (Unit: $)", + "choices": null, + "answer": "13", + "extraction": "13", + "prediction": "13", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 128, + "img_width": 229, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "801": { + "question_id": "801", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A gymnast jotted down the number of cartwheels she did each day. What is the mode of the numbers?'", + "choices": null, + "answer": "10", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 280, + "img_width": 272, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "803": { + "question_id": "803", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "805": { + "question_id": "805", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the donut more than half eaten?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 434, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "807": { + "question_id": "807", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following leaf shapes would have the least amount of wind resistance and water loss?\nChoices:\n(A) Truncate\n(B) Acuminate\n(C) Rounded\n(D) Sagittate", + "choices": [ + "Truncate", + "Acuminate", + "Rounded", + "Sagittate" + ], + "answer": "Acuminate", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Truncate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 300, + "img_width": 508, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "809": { + "question_id": "809", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In a group of horses, some individuals have a black coat and others have a reddish-brown coat. In this group, the gene for the coat color trait has two alleles. The allele for a black coat (L) is dominant over the allele for a reddish-brown coat (l).\nThis Punnett square shows a cross between two horses. What is the expected ratio of offspring with a reddish-brown coat to offspring with a black coat? Choose the most likely ratio.\nChoices:\n(A) 1:3\n(B) 4:0\n(C) 3:1\n(D) 0:4\n(E) 2:2", + "choices": [ + "1:3", + "4:0", + "3:1", + "0:4", + "2:2" + ], + "answer": "2:2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1:3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 241, + "img_width": 233, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "811": { + "question_id": "811", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A machine at the candy factory dispensed different numbers of lemon-flavored candies into various bags. What is the smallest number of lemon-flavored candies? (Unit: lemon-flavored candies)", + "choices": null, + "answer": "34", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 136, + "img_width": 247, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "813": { + "question_id": "813", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest value on the X axis?", + "choices": null, + "answer": "30", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2264, + "img_width": 1768, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "815": { + "question_id": "815", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle N C L$\nChoices:\n(A) 60\n(B) 120\n(C) 240\n(D) 360", + "choices": [ + "60", + "120", + "240", + "360" + ], + "answer": "120", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 279, + "img_width": 367, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "817": { + "question_id": "817", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the straight line a \u2225 b, the point B is on the straight line b, and AB \u22a5 BC, \u22202 = 65.0, then the degree of \u22201 is ()\nChoices:\n(A) 65\u00b0\n(B) 25\u00b0\n(C) 35\u00b0\n(D) 45\u00b0", + "choices": [ + "65\u00b0", + "25\u00b0", + "35\u00b0", + "45\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 94, + "img_width": 171, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "819": { + "question_id": "819", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the value of $t$ in the parallelogram.\nChoices:\n(A) 6\n(B) 7\n(C) 8\n(D) 13", + "choices": [ + "6", + "7", + "8", + "13" + ], + "answer": "7", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 400, + "img_width": 428, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "821": { + "question_id": "821", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are most of the people young men?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 360, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "823": { + "question_id": "823", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: You can see how organisms are interconnected from the diagram given. What will be the effect if all the Killer whales are removed?\nChoices:\n(A) The population of tuna will increase\n(B) Mouse will decrease in number\n(C) The phytoplankton will decrease\n(D) The grasshopper will die", + "choices": [ + "The population of tuna will increase", + "Mouse will decrease in number", + "The phytoplankton will decrease", + "The grasshopper will die" + ], + "answer": "The population of tuna will increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "The population of tuna will increase", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1080, + "img_width": 1152, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "825": { + "question_id": "825", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of metallic road bikes that are behind the large bus less than the number of small matte double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "827": { + "question_id": "827", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer for the missing picture.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "D", + "extraction": "A", + "prediction": "A", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 1138, + "img_width": 828, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "829": { + "question_id": "829", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which matchstick needs to be moved in order to create a square?\nChoices:\n(A) Top\n(B) Bottom\n(C) Left\n(D) Right\n(E) Not possible", + "choices": [ + "Top", + "Bottom", + "Left", + "Right", + "Not possible" + ], + "answer": "Left", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Top", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 396, + "img_width": 378, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "831": { + "question_id": "831", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An author recorded how many words she wrote in the past 3 days. How many words in total did the author write on Thursday and Friday? (Unit: words)", + "choices": null, + "answer": "679", + "extraction": "635", + "prediction": "635", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 156, + "img_width": 236, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "833": { + "question_id": "833", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Phenylalanine (Phe, 5) is a naturally occurring amino acid. What is the energy of interaction between its phenyl group and the electric dipole moment of a neighbouring peptide group? Take the distance between the groups as $4.0 \\mathrm{~nm}$ and treat the phenyl group as a benzene molecule. The magnitude of the dipole moment of the peptide group is $\\mu=1.3 \\mathrm{D}$ and the polarizability volume of benzene is $\\alpha^{\\prime}=1.04 \\times 10^{-29} \\mathrm{~m}^3$.", + "choices": null, + "answer": "-4.3", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 372, + "img_width": 474, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "835": { + "question_id": "835", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percent of people are wearing blue?", + "choices": null, + "answer": "0", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "837": { + "question_id": "837", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tiny red motorbikes than big red choppers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "839": { + "question_id": "839", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many years have value less than 10%?", + "choices": null, + "answer": "5", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "841": { + "question_id": "841", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Some friends compared the sizes of their stuffed animal collections. What is the median of the numbers?'", + "choices": null, + "answer": "9", + "extraction": "9", + "prediction": "9", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 265, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "843": { + "question_id": "843", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua greater than Red?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 752, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "845": { + "question_id": "845", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 390, + "img_width": 550, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "847": { + "question_id": "847", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which function grows the fastest as x increases?\nChoices:\n(A) red\n(B) purple\n(C) blue", + "choices": [ + "red", + "purple", + "blue" + ], + "answer": "red", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "red", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1294, + "img_width": 1706, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "849": { + "question_id": "849", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The 4 8x8 images shown below are encoded with JPEG coding. Based on their expected DCT (Discrete Cosine Transform) coefficients, Which image has the most non-zero AC coefficients? (a): Image A, (b): Image B, (c): Image C, (d): Image D.\nChoices:\n(A) (c)\n(B) (d)\n(C) (a)\n(D) (b)\n(E) (e)", + "choices": [ + "(c)", + "(d)", + "(a)", + "(b)", + "(e)" + ], + "answer": "(b)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(c)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 282, + "img_width": 940, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "851": { + "question_id": "851", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the net concessional disbursements from imf greater than 32000000 US$?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1139, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "853": { + "question_id": "853", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the diamond ABCD, \u2220BAD = 120.0, the length of the diagonal AC is 3.0, then the perimeter of the diamond ABCD is ()\nChoices:\n(A) 3\n(B) 6\n(C) 9\n(D) 12", + "choices": [ + "3", + "6", + "9", + "12" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 98, + "img_width": 169, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "855": { + "question_id": "855", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $x$ so that $a \u2225 b$.\nChoices:\n(A) 2.5\n(B) 14\n(C) 15\n(D) 16", + "choices": [ + "2.5", + "14", + "15", + "16" + ], + "answer": "14", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 250, + "img_width": 536, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "857": { + "question_id": "857", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "9", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "859": { + "question_id": "859", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "27", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 603, + "img_width": 750, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "861": { + "question_id": "861", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Crimson less than Gray?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 680, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "863": { + "question_id": "863", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Rhode Island have the lowest value in the USA ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "865": { + "question_id": "865", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Hot Pink have the lowest value?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 512, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "867": { + "question_id": "867", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A food industry researcher compiled the revenues of several pizzerias. How much did Dan's Deep Dish make from pizza sales? (Unit: $)", + "choices": null, + "answer": "22", + "extraction": "14", + "prediction": "14", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 465, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "869": { + "question_id": "869", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large yellow matte cubes. Subtract all metal things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "871": { + "question_id": "871", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 200, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "873": { + "question_id": "873", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many groups of bars contain at least one bar with value smaller than 40?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "875": { + "question_id": "875", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow things. Subtract all blue cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "877": { + "question_id": "877", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms squad and warm?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "879": { + "question_id": "879", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large gray rubber things. Subtract all small blue spheres. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "881": { + "question_id": "881", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the population of grasshopper decreases, the population of mouse will most likely do what?\nChoices:\n(A) decrease\n(B) remain the same\n(C) increase\n(D) NA", + "choices": [ + "decrease", + "remain the same", + "increase", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "883": { + "question_id": "883", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "15", + "extraction": "16", + "prediction": "16", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 207, + "img_width": 868, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "885": { + "question_id": "885", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Grayson counted the number of pieces of pepperoni on each pizza he made. What is the smallest number of pieces of pepperoni? (Unit: pieces of pepperoni)", + "choices": null, + "answer": "18", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 136, + "img_width": 225, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "887": { + "question_id": "887", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u25b3ABC is the inscribed triangle of \u2299O. If \u2220ABC = 70.0, then the degree of \u2220AOC is equal to ()\nChoices:\n(A) 140\u00b0\n(B) 130\u00b0\n(C) 120\u00b0\n(D) 110\u00b0", + "choices": [ + "140\u00b0", + "130\u00b0", + "120\u00b0", + "110\u00b0" + ], + "answer": "140\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "140\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 106, + "img_width": 119, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "889": { + "question_id": "889", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Purple the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 472, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "891": { + "question_id": "891", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracy lower than 8 in at least one dataset?", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "893": { + "question_id": "893", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the limit of the blue function as x approaches negative infinity?", + "choices": null, + "answer": "0", + "extraction": "-2", + "prediction": "-2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 331, + "img_width": 327, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "895": { + "question_id": "895", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model has the lowest Audio-Audio Similarity and Text-Audio Similarity scores overall?\nChoices:\n(A) MusicLDM (mix-up)\n(B) MusicLDM (original)\n(C) MusicLDM (BLM)\n(D) MusicLDM (BAM)\n(E) MuBERT", + "choices": [ + "MusicLDM (mix-up)", + "MusicLDM (original)", + "MusicLDM (BLM)", + "MusicLDM (BAM)", + "MuBERT" + ], + "answer": "MuBERT", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "MusicLDM (mix-up)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "violin plot", + "grade": "college", + "img_height": 682, + "img_width": 1882, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "897": { + "question_id": "897", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use a calculator to find the measure of $\u2220J$ to the nearest degree.\nChoices:\n(A) 33\n(B) 40\n(C) 50\n(D) 57", + "choices": [ + "33", + "40", + "50", + "57" + ], + "answer": "40", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "33", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 223, + "img_width": 352, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "899": { + "question_id": "899", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number comes next?", + "choices": null, + "answer": "2123", + "extraction": "1357", + "prediction": "1357", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 185, + "img_width": 406, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "901": { + "question_id": "901", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all shiny spheres. Subtract all big red matte spheres. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "903": { + "question_id": "903", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, if \u2220ABC = 30.0, then the degree of \u2220AOC is ()\nChoices:\n(A) 30\u00b0\n(B) 45\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "30\u00b0", + "45\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "60\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 112, + "img_width": 110, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "905": { + "question_id": "905", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of large red cars behind the metal car less than the number of blue matte tandem bikes that are behind the big blue rubber utility bike?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "907": { + "question_id": "907", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When the military expenditure value was lower than 0.2%?", + "choices": null, + "answer": "1970", + "extraction": "1970", + "prediction": "1970", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "909": { + "question_id": "909", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b3ABC, DE \u2225 BC, if AD = 1.0, DB = 2.0, then the value of \\frac ADAB is ()\nChoices:\n(A) \\frac{2}{3}\n(B) \\frac{1}{4}\n(C) \\frac{1}{3}\n(D) \\frac{1}{2}", + "choices": [ + "\\frac{2}{3}", + "\\frac{1}{4}", + "\\frac{1}{3}", + "\\frac{1}{2}" + ], + "answer": "\\frac{1}{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{2}{3}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 132, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "911": { + "question_id": "911", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the smaller picture below the larger picture?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "913": { + "question_id": "913", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Cyan have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 763, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "915": { + "question_id": "915", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to the Lion population if the Gum Tree population decreased?\nChoices:\n(A) Unable to determine.\n(B) Nothing would happen.\n(C) It would also decrease.\n(D) It would increase.", + "choices": [ + "Unable to determine.", + "Nothing would happen.", + "It would also decrease.", + "It would increase." + ], + "answer": "It would also decrease.", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Unable to determine.", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 740, + "img_width": 528, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "917": { + "question_id": "917", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the ratio of the number of procedures to register a business in 2004 to that in 2007?", + "choices": null, + "answer": "1", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 939, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "919": { + "question_id": "919", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many items sold more than 3 units in at least one store?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "921": { + "question_id": "921", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x to the nearest tenth. Assume that segments that appear to be tangent are tangent.\nChoices:\n(A) 5\n(B) 8.1\n(C) 10.3\n(D) 21.6", + "choices": [ + "5", + "8.1", + "10.3", + "21.6" + ], + "answer": "21.6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 170, + "img_width": 226, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "923": { + "question_id": "923", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model achieves the highest score in terms of Rec?\nChoices:\n(A) Transformers Agent (GPT-4)\n(B) LLaMA-Adapter v2-7B\n(C) LLaVA-7B\n(D) Otter-9B \n(E) MM-ReAct-GPT-3.5\n(F) LLaVA-13B (LLaMA-2)\n(G) MM-ReAct-GPT-4", + "choices": [ + "Transformers Agent (GPT-4)", + "LLaMA-Adapter v2-7B", + "LLaVA-7B", + "Otter-9B ", + "MM-ReAct-GPT-3.5", + "LLaVA-13B (LLaMA-2)", + "MM-ReAct-GPT-4" + ], + "answer": "LLaVA-13B (LLaMA-2)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Transformers Agent (GPT-4)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1056, + "img_width": 1910, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "925": { + "question_id": "925", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Haley went to the store. She bought 3+9/10 pounds of pumpernickel bread crumbs. How much did she spend? (Unit: $)", + "choices": null, + "answer": "19.5", + "extraction": "15.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 130, + "img_width": 334, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "927": { + "question_id": "927", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0cAB\u7684\u5782\u76f4\u5e73\u5206\u7ebf\u4ea4AB\u4e8e\u70b9D\uff0c\u4ea4BC\u4e8e\u70b9E\uff0c\u8fde\u63a5AE\uff0e\u82e5AB\uff1d6\uff0c\u25b3ACE\u7684\u5468\u957f\u4e3a13\uff0c\u5219\u25b3ABC\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 19\n(B) 16\n(C) 29\n(D) 18", + "choices": [ + "19", + "16", + "29", + "18" + ], + "answer": "19", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "19", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 152, + "img_width": 199, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "929": { + "question_id": "929", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Tim need to buy a mystery game and a toy rocket? (Unit: $)", + "choices": null, + "answer": "85", + "extraction": "32", + "prediction": "32", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 192, + "img_width": 226, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "931": { + "question_id": "931", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u25b3ABC is the inscribed triangle of \u2299O, AB is the diameter of \u2299O, point D is a point on \u2299O, if \u2220ACD = 40.0, then the size of \u2220BAD is ()\nChoices:\n(A) 35\u00b0\n(B) 50\u00b0\n(C) 40\u00b0\n(D) 60\u00b0", + "choices": [ + "35\u00b0", + "50\u00b0", + "40\u00b0", + "60\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 123, + "img_width": 124, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "933": { + "question_id": "933", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Hector need to buy a European vacation package and an Australian vacation package? (Unit: $)", + "choices": null, + "answer": "9606", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 160, + "img_width": 344, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "935": { + "question_id": "935", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0cAD\uff1d6\uff0cAB\uff1d4\uff0cDE\u5e73\u5206\u2220ADC\u4ea4BC\u4e8e\u70b9E\uff0c\u5219BE\u7684\u957f\u662f\uff08\uff09\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5", + "choices": [ + "2", + "3", + "4", + "5" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 81, + "img_width": 140, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "937": { + "question_id": "937", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Periwinkle the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 785, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "939": { + "question_id": "939", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would be most affected if the clams all died?\nChoices:\n(A) squid\n(B) lantern fish\n(C) octopus\n(D) sea horse", + "choices": [ + "squid", + "lantern fish", + "octopus", + "sea horse" + ], + "answer": "octopus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "squid", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 764, + "img_width": 1162, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "941": { + "question_id": "941", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which is the next number in the series?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 327, + "img_width": 271, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "943": { + "question_id": "943", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between two consecutive major ticks on the Y-axis ?", + "choices": null, + "answer": "2", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1258, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "945": { + "question_id": "945", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 451, + "img_width": 610, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "947": { + "question_id": "947", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u2225CD\uff0cBC\u2225DE\uff0c\u2220A\uff1d45\u00b0\uff0c\u2220C\uff1d110\u00b0\uff0c\u5219\u2220AED\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 95\u00b0\n(B) 105\u00b0\n(C) 115\u00b0\n(D) 125\u00b0", + "choices": [ + "95\u00b0", + "105\u00b0", + "115\u00b0", + "125\u00b0" + ], + "answer": "115\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "95\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 170, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "949": { + "question_id": "949", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the combined percentage of Lowest ROI and Medium ROI in SEO?", + "choices": null, + "answer": "56", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "951": { + "question_id": "951", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $x$.\nChoices:\n(A) 10.25\n(B) 12.75\n(C) 18.75\n(D) 25.5", + "choices": [ + "10.25", + "12.75", + "18.75", + "25.5" + ], + "answer": "12.75", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10.25", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 427, + "img_width": 487, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "953": { + "question_id": "953", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of trees have leaves?", + "choices": null, + "answer": "50", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "955": { + "question_id": "955", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0e\u70b9O\u662f\u6b63\u4e94\u8fb9\u5f62ABCDE\u7684\u4e2d\u5fc3\uff0c\u2299O\u662f\u6b63\u4e94\u8fb9\u5f62\u7684\u5916\u63a5\u5706\uff0c\u2220ADE\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 30\u00b0\n(B) 32\u00b0\n(C) 36\u00b0\n(D) 40\u00b0", + "choices": [ + "30\u00b0", + "32\u00b0", + "36\u00b0", + "40\u00b0" + ], + "answer": "36\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 136, + "img_width": 136, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "957": { + "question_id": "957", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big brown buss behind the gray matte aeroplane greater than the number of yellow shiny scooters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "959": { + "question_id": "959", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The teachers at an elementary school counted how many desks they had in their classrooms. What is the median of the numbers?'", + "choices": null, + "answer": "32", + "extraction": "35", + "prediction": "35", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 230, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "961": { + "question_id": "961", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest value in blue bar?", + "choices": null, + "answer": "7", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "963": { + "question_id": "963", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For what x does f reach its local maximum?", + "choices": null, + "answer": "3", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 397, + "img_width": 441, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "965": { + "question_id": "965", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: whats the lowest number yard line that you can see?", + "choices": null, + "answer": "30", + "extraction": "30", + "prediction": "30", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 690, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "967": { + "question_id": "967", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the amount earned from national visitors greater than the average amount earned from national visitors taken over all years ?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1146, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "969": { + "question_id": "969", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Yellow Green have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 587, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "971": { + "question_id": "971", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can the boy reach the highest book?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "973": { + "question_id": "973", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many zeros does this function have?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 2039, + "img_width": 2560, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "975": { + "question_id": "975", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown matte objects. Subtract all blue metallic objects. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "977": { + "question_id": "977", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5df2\u77e5AB\u2225CD\uff0cAF\u4e0eCD\u4ea4\u4e8e\u70b9E\uff0cBE\u22a5AF\uff0c\u2220B\uff1d65\u00b0\uff0c\u5219\u2220DEF\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 65\u00b0\n(B) 5\u00b0\n(C) 15\u00b0\n(D) 25\u00b0", + "choices": [ + "65\u00b0", + "5\u00b0", + "15\u00b0", + "25\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 129, + "img_width": 250, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "979": { + "question_id": "979", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9079", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 279, + "img_width": 634, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "981": { + "question_id": "981", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the sum of 2002, 2003 and 2004?", + "choices": null, + "answer": "70.4", + "extraction": "6006.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "983": { + "question_id": "983", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest accuracy reported in the whole chart?", + "choices": null, + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "985": { + "question_id": "985", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the smallest percentage value recorded in the chart?", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "987": { + "question_id": "987", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A cross-section of an airplane wing is shown. Measurements of the thickness of the wing, in centimeters, at 20-centimeter intervals are 5.8, 20.3, 26.7, 29.0, 27.6, 27.3, 23.8, 20.5, 15.1, 8.7, and 2.8. Use the Midpoint Rule to estimate the area of the wing's cross-section.", + "choices": null, + "answer": "4232", + "extraction": "140", + "prediction": "140", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 338, + "img_width": 946, + "language": "english", + "skills": [ + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "989": { + "question_id": "989", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people like the most preferred object in the whole chart?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "991": { + "question_id": "991", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which functions have no zeros?\nChoices:\n(A) green and red\n(B) blue and orange\n(C) purple and grey", + "choices": [ + "green and red", + "blue and orange", + "purple and grey" + ], + "answer": "purple and grey", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "green and red", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 383, + "img_width": 616, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "993": { + "question_id": "993", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny yellow matte cylinders. Subtract all small blue rubber spheres. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "995": { + "question_id": "995", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the sum of the two bars in 18 to 21?", + "choices": null, + "answer": "18.27", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "997": { + "question_id": "997", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is kx^2/2 larger than E at x=0?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 197, + "img_width": 537, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "999": { + "question_id": "999", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: The magnitude of the acceleration vector a is $10 \\mathrm{~cm} / \\mathrm{s}^2$. Use the figure to estimate the tangential components of $\\mathbf{a}$.", + "choices": null, + "answer": "4.5", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 484, + "img_width": 478, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "2": { + "question_id": "2", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the total volume of the measuring cup? (Unit: g)", + "choices": null, + "answer": "1000", + "extraction": "1000", + "prediction": "1000", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 768, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "4": { + "question_id": "4", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large rubber spheres. Subtract all big shiny cylinders. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "6": { + "question_id": "6", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, if CB = 4.0, DB = 7.0, and D is the midpoint of AC, then the length of AC is ()\nChoices:\n(A) 6cm\n(B) 7cm\n(C) 4cm\n(D) 5cm", + "choices": [ + "6cm", + "7cm", + "4cm", + "5cm" + ], + "answer": "6cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6cm", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 30, + "img_width": 203, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "8": { + "question_id": "8", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny gray bicycles that are on the left side of the brown metal sedan greater than the number of things that are to the left of the tiny green bicycle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "10": { + "question_id": "10", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which object comes next?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "E", + "extraction": "D", + "prediction": "D", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 418, + "img_width": 376, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "12": { + "question_id": "12", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer metallic fighters than rubber objects?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "14": { + "question_id": "14", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny objects that are behind the small metal jet less than the number of tiny things left of the tiny sedan?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "16": { + "question_id": "16", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many items sold less than 5 units in at least one store?", + "choices": null, + "answer": "2", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "18": { + "question_id": "18", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The passage below describes an experiment. Read the passage and then follow the instructions below.\n\nLinda applied a thin layer of wax to the underside of her snowboard and rode the board straight down a hill. Then, she removed the wax and rode the snowboard straight down the hill again. She repeated the rides four more times, alternating whether she rode with a thin layer of wax on the board or not. Her friend Bob timed each ride. Linda and Bob calculated the average time it took to slide straight down the hill on the snowboard with wax compared to the average time on the snowboard without wax.\nFigure: snowboarding down a hill. Identify the question that Linda and Bob's experiment can best answer.\nChoices:\n(A) Does Linda's snowboard slide down a hill in less time when it has a thin layer of wax or a thick layer of wax?\n(B) Does Linda's snowboard slide down a hill in less time when it has a layer of wax or when it does not have a layer of wax?", + "choices": [ + "Does Linda's snowboard slide down a hill in less time when it has a thin layer of wax or a thick layer of wax?", + "Does Linda's snowboard slide down a hill in less time when it has a layer of wax or when it does not have a layer of wax?" + ], + "answer": "Does Linda's snowboard slide down a hill in less time when it has a layer of wax or when it does not have a layer of wax?", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Does Linda's snowboard slide down a hill in less time when it has a thin layer of wax or a thick layer of wax?", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "elementary school", + "img_height": 232, + "img_width": 302, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "20": { + "question_id": "20", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sum of smallest two bar is greater then the largest bar?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "22": { + "question_id": "22", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 785, + "img_width": 555, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "24": { + "question_id": "24", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 709, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "26": { + "question_id": "26", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Black greater than Deep Sky Blue?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 761, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "28": { + "question_id": "28", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{AB}$ is a diameter, $AC=8$ inches, and $BC=15$ inches. Find the radius of the circle.\nChoices:\n(A) 7.5\n(B) 8\n(C) 8.5\n(D) 17", + "choices": [ + "7.5", + "8", + "8.5", + "17" + ], + "answer": "8.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 431, + "img_width": 519, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "30": { + "question_id": "30", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the two chords AB and CD in the circle intersect at E, \u2220D = 35.0, \u2220AEC = 105.0, then \u2220C = ()\nChoices:\n(A) 60\u00b0\n(B) 70\u00b0\n(C) 80\u00b0\n(D) 85\u00b0", + "choices": [ + "60\u00b0", + "70\u00b0", + "80\u00b0", + "85\u00b0" + ], + "answer": "70\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 113, + "img_width": 117, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "32": { + "question_id": "32", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0cAB\uff1dAC\uff0c\u2220CAB\uff1d40\u00b0\uff0c\u5219\u2220D\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 40\u00b0\n(B) 50\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "40\u00b0", + "50\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "70\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 100, + "img_width": 168, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "34": { + "question_id": "34", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function continuous at each point?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 479, + "img_width": 479, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "36": { + "question_id": "36", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 800, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "38": { + "question_id": "38", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values smaller than 6?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "40": { + "question_id": "40", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown blocks. Subtract all large blue rubber things. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "42": { + "question_id": "42", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 539, + "img_width": 401, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "44": { + "question_id": "44", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Chase wants to buy 4 kilograms of oval beads and 5 kilograms of star-shaped beads. How much will he spend? (Unit: $)", + "choices": null, + "answer": "18", + "extraction": "14", + "prediction": "14", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 226, + "img_width": 305, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "46": { + "question_id": "46", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to the population of adult spiders if predator ate all the spider eggs?\nChoices:\n(A) Adult spider population would remain the same\n(B) Adult spider population would double.\n(C) Adults spider population would decrease\n(D) Adult spider population would increase.", + "choices": [ + "Adult spider population would remain the same", + "Adult spider population would double.", + "Adults spider population would decrease", + "Adult spider population would increase." + ], + "answer": "Adults spider population would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Adult spider population would remain the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 829, + "img_width": 1024, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "48": { + "question_id": "48", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle 3$.\nChoices:\n(A) 28\n(B) 38\n(C) 52\n(D) 62", + "choices": [ + "28", + "38", + "52", + "62" + ], + "answer": "38", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "28", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 426, + "img_width": 596, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "50": { + "question_id": "50", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the food web, what would likely happen if the number of large roach would decrease?\nChoices:\n(A) The population of steelheads would decrease.\n(B) The population of stickleback fry would increase.\n(C) The population of predatory insects would increase.\n(D) The population of predatory insects would decrease.", + "choices": [ + "The population of steelheads would decrease.", + "The population of stickleback fry would increase.", + "The population of predatory insects would increase.", + "The population of predatory insects would decrease." + ], + "answer": "The population of predatory insects would decrease.", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "The population of steelheads would decrease.", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 600, + "img_width": 633, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "52": { + "question_id": "52", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big red metallic spheres. Subtract all big brown matte things. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "54": { + "question_id": "54", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, the ratio of the length of line AB to the length of line AC is 2 : 5. If AC = 25, what is the length of line AB?\nChoices:\n(A) 8\n(B) 10\n(C) 15\n(D) 18\n(E) 20", + "choices": [ + "8", + "10", + "15", + "18", + "20" + ], + "answer": "10", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "8", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 310, + "img_width": 433, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "56": { + "question_id": "56", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the rectangle?", + "choices": null, + "answer": "6", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 295, + "img_width": 202, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "58": { + "question_id": "58", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Firebrick have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 760, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "60": { + "question_id": "60", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "22", + "extraction": "40", + "prediction": "40", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 381, + "img_width": 477, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "62": { + "question_id": "62", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cE\uff0cF\u5206\u522b\u662f\u83f1\u5f62ABCD\u7684\u8fb9AB\uff0cAD\u7684\u4e2d\u70b9\uff0c\u4e14AB\uff1d5\uff0cAC\uff1d6\uff0e\u5219EF\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 4\n(B) 5\n(C) 5.5\n(D) 6", + "choices": [ + "4", + "5", + "5.5", + "6" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 138, + "img_width": 160, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "64": { + "question_id": "64", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagrams below show two pure samples of gas in identical closed, rigid containers. Each colored ball represents one gas particle. Both samples have the same number of particles. Compare the average kinetic energies of the particles in each sample. Which sample has the higher temperature?\nChoices:\n(A) neither; the samples have the same temperature\n(B) sample A\n(C) sample B", + "choices": [ + "neither; the samples have the same temperature", + "sample A", + "sample B" + ], + "answer": "sample A", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; the samples have the same temperature", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "elementary school", + "img_height": 405, + "img_width": 550, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "66": { + "question_id": "66", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer for the missing picture.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "A", + "extraction": "A", + "prediction": "A", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 562, + "img_width": 320, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "68": { + "question_id": "68", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5c06\u4e00\u6839\u957f\u5ea6\u4e3a16cm\u81ea\u7136\u4f38\u76f4\u7684\u5f39\u6027\u76ae\u7b4bAB\u4e24\u7aef\u56fa\u5b9a\u5728\u6c34\u5e73\u7684\u684c\u9762\u4e0a\uff0c\u7136\u540e\u628a\u4e2d\u70b9C\u7ad6\u76f4\u5411\u4e0a\u62c9\u53476cm\u81f3D\u70b9\uff08\u5982\u56fe\uff09\uff0c\u5219\u8be5\u5f39\u6027\u76ae\u7b4b\u88ab\u62c9\u957f\u4e86\uff08\uff09\nChoices:\n(A) 2cm\n(B) 4cm\n(C) 6cm\n(D) 8cm", + "choices": [ + "2cm", + "4cm", + "6cm", + "8cm" + ], + "answer": "4cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 84, + "img_width": 252, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "70": { + "question_id": "70", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "8", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2600, + "img_width": 2266, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "72": { + "question_id": "72", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A real estate agent drove around the neighborhood and counted the number of houses on each block. How many blocks have exactly 36 houses? (Unit: blocks)", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 136, + "img_width": 197, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "74": { + "question_id": "74", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the difference of largest and smallest bar?", + "choices": null, + "answer": "47.6", + "extraction": "40.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "76": { + "question_id": "76", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens to fish if pelicans increase?\nChoices:\n(A) decrease\n(B) nothing\n(C) increase\n(D) none of the above", + "choices": [ + "decrease", + "nothing", + "increase", + "none of the above" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 947, + "img_width": 850, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "78": { + "question_id": "78", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Find the missing value.", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 394, + "img_width": 1062, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "80": { + "question_id": "80", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the food web, what will happen if all the algae died due to pesticides?\nChoices:\n(A) Crabs and limpets will decrease\n(B) Dolphins will increase\n(C) Sea gulls will become extinct\n(D) Star fish will increase", + "choices": [ + "Crabs and limpets will decrease", + "Dolphins will increase", + "Sea gulls will become extinct", + "Star fish will increase" + ], + "answer": "Crabs and limpets will decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Crabs and limpets will decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 199, + "img_width": 372, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "82": { + "question_id": "82", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A square is inscribed in a circle of area 18$\\pi$ square units. Find the length of a side of the square.\nChoices:\n(A) 3\n(B) 3 \\sqrt 2\n(C) 6\n(D) 6 \\sqrt 2", + "choices": [ + "3", + "3 \\sqrt 2", + "6", + "6 \\sqrt 2" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 202, + "img_width": 200, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "84": { + "question_id": "84", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: ABCD is a square. Inscribed Circle center is O. Find the the angle of \u2220AMK. Return the numeric value.", + "choices": null, + "answer": "130.9", + "extraction": "120.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 1220, + "img_width": 1194, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "86": { + "question_id": "86", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model has the highest Acc score when Pretrain Loss is equal to 1.80?\nChoices:\n(A) ICL\n(B) SFT\n(C) SFT 1/8\n(D) RFT k=100\n(E) RFT k=25\n(F) RET k=6\n(G) RFT U13B", + "choices": [ + "ICL", + "SFT", + "SFT 1/8", + "RFT k=100", + "RFT k=25", + "RET k=6", + "RFT U13B" + ], + "answer": "RFT U13B", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "ICL", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 1046, + "img_width": 1734, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "88": { + "question_id": "88", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A square is tangent to a line at point P in the figure above. What is the value of x?", + "choices": null, + "answer": "30", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 277, + "img_width": 442, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "90": { + "question_id": "90", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow matte blocks. Subtract all tiny brown cylinders. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "92": { + "question_id": "92", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do the windows have a geometric shape that most houses have?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 375, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "94": { + "question_id": "94", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cD\u4e3a\u25b3ABC\u5185\u4e00\u70b9\uff0cCD\u5e73\u5206\u2220ACB\uff0cBD\u22a5CD\uff0c\u2220A\uff1d\u2220ABD\uff0c\u82e5\u2220DBC\uff1d54\u00b0\uff0c\u5219\u2220A\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 36\u00b0\n(B) 44\u00b0\n(C) 27\u00b0\n(D) 54\u00b0", + "choices": [ + "36\u00b0", + "44\u00b0", + "27\u00b0", + "54\u00b0" + ], + "answer": "27\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "36\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 74, + "img_width": 160, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "96": { + "question_id": "96", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: How many times Dissatisfied more than satisfied?", + "choices": null, + "answer": "3.9", + "extraction": "2.7", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 328, + "img_width": 186, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "98": { + "question_id": "98", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Find the value of the square in the figure.", + "choices": null, + "answer": "2", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 506, + "img_width": 900, + "language": "english", + "skills": [ + "logical reasoning", + "algebraic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "100": { + "question_id": "100", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of all the values in the ruling group?", + "choices": null, + "answer": "12", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "102": { + "question_id": "102", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The shape is made of unit squares. What is the area of the shape?", + "choices": null, + "answer": "6", + "extraction": "16", + "prediction": "16", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 156, + "img_width": 106, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "104": { + "question_id": "104", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?", + "choices": null, + "answer": "0.8", + "extraction": "0.8", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "106": { + "question_id": "106", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values smaller than 1?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "108": { + "question_id": "108", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Find out the average of the bottom two countries ??", + "choices": null, + "answer": "51.04", + "extraction": "40.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "110": { + "question_id": "110", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sum of two lowest bar is greater then the largest bar?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "112": { + "question_id": "112", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big cyan airliners less than the number of gray shiny utility bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "114": { + "question_id": "114", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, KL is tangent to $\\odot M$ at K. Find the value of x.\nChoices:\n(A) 6.00\n(B) 9.45\n(C) 18.9\n(D) 37.8", + "choices": [ + "6.00", + "9.45", + "18.9", + "37.8" + ], + "answer": "9.45", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6.00", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 273, + "img_width": 347, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "116": { + "question_id": "116", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which leaf has the most veins?\nChoices:\n(A) Acuminate\n(B) Truncate\n(C) Mucronate\n(D) Acute", + "choices": [ + "Acuminate", + "Truncate", + "Mucronate", + "Acute" + ], + "answer": "Acuminate", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Acuminate", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 187, + "img_width": 350, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "118": { + "question_id": "118", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the maximum value of this function?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 296, + "img_width": 600, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "120": { + "question_id": "120", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the degree of this function?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 320, + "img_width": 312, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "122": { + "question_id": "122", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer yellow regular buss than small yellow metallic school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "124": { + "question_id": "124", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: This type of leaf arrangement consists of at least three leaves attached to a node.\nChoices:\n(A) Whorled\n(B) Simple\n(C) Opposite\n(D) Alternate", + "choices": [ + "Whorled", + "Simple", + "Opposite", + "Alternate" + ], + "answer": "Whorled", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Whorled", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 225, + "img_width": 576, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "126": { + "question_id": "126", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the leftmost and the rigtmost person? (Unit: years)", + "choices": null, + "answer": "9", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 800, + "img_width": 623, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "128": { + "question_id": "128", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large metal blocks. Subtract all yellow cylinders. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "130": { + "question_id": "130", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1403, + "img_width": 1063, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "132": { + "question_id": "132", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u57284\u00d74\u7684\u6b63\u65b9\u5f62\u7f51\u683c\u4e2d\uff0c\u6bcf\u4e2a\u5c0f\u6b63\u65b9\u5f62\u7684\u8fb9\u957f\u5747\u4e3a1\uff0c\u70b9A\uff0cB\uff0cC\u90fd\u5728\u683c\u70b9\u4e0a\uff0cAD\u22a5BC\u4e8eD\uff0c\u5219AD\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 1.5\n(C) 2\n(D) \\frac{7}{3}", + "choices": [ + "1", + "1.5", + "2", + "\\frac{7}{3}" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 160, + "img_width": 155, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "134": { + "question_id": "134", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: People can use the engineering-design process to develop solutions to problems. One step in the process is testing if a potential solution meets the requirements of the design.\nThe passage below describes how the engineering-design process was used to test a solution to a problem. Read the passage. Then answer the question below.\n\nCooper was a landscape architect who was hired to design a new city park. The city council wanted the park to have space for outdoor concerts and to have at least 20% of the park shaded by trees. Cooper thought the concert area should be at least 150 meters from the road so traffic noise didn't interrupt the music. He developed three possible designs for the park with the concert area in a different location in each design. Then, he tested each design by measuring the distance between the road and the concert area.\nFigure: studying an architect's design. Which of the following could Cooper's test show?\nChoices:\n(A) if at least 20% of the park would be shaded by trees in each design\n(B) which design would have the least traffic noise in the concert area\n(C) which design would have the greatest distance between the concert area and the road", + "choices": [ + "if at least 20% of the park would be shaded by trees in each design", + "which design would have the least traffic noise in the concert area", + "which design would have the greatest distance between the concert area and the road" + ], + "answer": "which design would have the greatest distance between the concert area and the road", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "if at least 20% of the park would be shaded by trees in each design", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "high school", + "img_height": 232, + "img_width": 302, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "136": { + "question_id": "136", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest and the lowest value of blue bar?", + "choices": null, + "answer": "64", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 443, + "img_width": 415, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "138": { + "question_id": "138", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sandwich cut in half?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "140": { + "question_id": "140", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which food has the least carbs?\nChoices:\n(A) soup\n(B) water\n(C) sandwich\n(D) buns", + "choices": [ + "soup", + "water", + "sandwich", + "buns" + ], + "answer": "soup", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "soup", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 428, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "142": { + "question_id": "142", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is it split in half?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 425, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "144": { + "question_id": "144", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Natalie buys 4.6 kilograms of turmeric. What is the total cost? (Unit: $)", + "choices": null, + "answer": "13.8", + "extraction": "18.4", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 162, + "img_width": 210, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "146": { + "question_id": "146", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Kimberly's classmates revealed how many science articles they read. What is the range of the numbers?'", + "choices": null, + "answer": "4", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 286, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "148": { + "question_id": "148", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which leaf shape has the smallest base?\nChoices:\n(A) Hastate\n(B) Cordate\n(C) Sagittate\n(D) Decurrent", + "choices": [ + "Hastate", + "Cordate", + "Sagittate", + "Decurrent" + ], + "answer": "Decurrent", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Hastate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 161, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "150": { + "question_id": "150", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A, B, and C are three points on \u2299O, and the straight line CD and \u2299O are tangent to point C. If \u2220DCB = 40.0, then the degree of \u2220CAB is ()\nChoices:\n(A) 40\u00b0\n(B) 50\u00b0\n(C) 80\u00b0\n(D) 100\u00b0", + "choices": [ + "40\u00b0", + "50\u00b0", + "80\u00b0", + "100\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 144, + "img_width": 110, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "152": { + "question_id": "152", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfl1\u2225l2\uff0c\u5c06\u542b30\u00b0\u89d2\u7684\u76f4\u89d2\u4e09\u89d2\u677f\u6309\u5982\u56fe\u65b9\u5f0f\u653e\u7f6e\uff0c\u76f4\u89d2\u9876\u70b9\u5728l2\u4e0a\uff0c\u82e5\u22201\uff1d76\u00b0\uff0c\u5219\u22202\uff1d\uff08\uff09\nChoices:\n(A) 36\u00b0\n(B) 45\u00b0\n(C) 44\u00b0\n(D) 64\u00b0", + "choices": [ + "36\u00b0", + "45\u00b0", + "44\u00b0", + "64\u00b0" + ], + "answer": "44\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "36\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 208, + "img_width": 229, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "154": { + "question_id": "154", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this an odd function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 744, + "img_width": 1114, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "156": { + "question_id": "156", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the limit of the as x approaches 1 from the left side?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 291, + "img_width": 327, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "158": { + "question_id": "158", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 685, + "img_width": 911, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "160": { + "question_id": "160", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x.\nChoices:\n(A) 10\n(B) 11\n(C) 12\n(D) 13", + "choices": [ + "10", + "11", + "12", + "13" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 227, + "img_width": 270, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "162": { + "question_id": "162", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The bird watcher counted the number of birds in each flock that passed overhead. How many flocks had at least 17 birds but fewer than 33 birds? (Unit: flocks)", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 202, + "img_width": 117, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "164": { + "question_id": "164", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b1ABCD, CE \u22a5 AB, point E is the foot of perpendicular, if \u2220D = 55.0, then \u2220BCE = ()\nChoices:\n(A) 55\u00b0\n(B) 35\u00b0\n(C) 25\u00b0\n(D) 30\u00b0", + "choices": [ + "55\u00b0", + "35\u00b0", + "25\u00b0", + "30\u00b0" + ], + "answer": "35\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "55\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 84, + "img_width": 161, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "166": { + "question_id": "166", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which Shape is missing?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "B", + "extraction": "A", + "prediction": "A", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 816, + "img_width": 2028, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "168": { + "question_id": "168", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given that the Hue-Saturation subspace shown in Fig. Q2 is a perfect circle and that colors A, B and C can be represented as the 3 points shown in the subspace. Which color has the smallest saturation coefficient?\nChoices:\n(A) (c)\n(B) (a)\n(C) (e)\n(D) (d)\n(E) (b)", + "choices": [ + "(c)", + "(a)", + "(e)", + "(d)", + "(b)" + ], + "answer": "(b)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(c)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 454, + "img_width": 414, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "170": { + "question_id": "170", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: f(-1) is ____ f(0).\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "smaller than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 296, + "img_width": 600, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "172": { + "question_id": "172", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Seafoam less than Dark Salmon?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 524, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "174": { + "question_id": "174", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tiny cyan suvs that are behind the aeroplane than cyan utility bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "176": { + "question_id": "176", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $RS$ if $\\triangle QRS$ is an equilateral triangle.\nChoices:\n(A) 0.5\n(B) 1\n(C) 1.5\n(D) 2", + "choices": [ + "0.5", + "1", + "1.5", + "2" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 292, + "img_width": 305, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "178": { + "question_id": "178", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9A\u3001C\u5728\u2220FBD\u7684\u4e24\u6761\u8fb9BF\u3001BD\u4e0a\uff0cBE\u5e73\u5206\u2220FBD\uff0cCE\u5e73\u5206\u2220ACD\uff0c\u8fde\u63a5AE\uff0c\u82e5\u2220BEC\uff1d35\u00b0\uff0c\u5219\u2220FAE\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 35\u00b0\n(B) 45\u00b0\n(C) 55\u00b0\n(D) 65\u00b0", + "choices": [ + "35\u00b0", + "45\u00b0", + "55\u00b0", + "65\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 99, + "img_width": 129, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "180": { + "question_id": "180", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny brown cylinders. Subtract all tiny brown objects. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "182": { + "question_id": "182", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Green greater than Yellow?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 589, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "184": { + "question_id": "184", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values smaller than 0?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "186": { + "question_id": "186", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, CD is a plane mirror, the light is emitted from point A, reflected by point E on CD, and irradiated to point B. If the incident angle is \u03b1, AC \u22a5 CD, BD \u22a5 CD, the feet of perpendicular are C, D, and AC = 3.0, BD = 6.0, CD = 10.0, then the length of the line segment ED is ()\nChoices:\n(A) \\frac{20}{3}\n(B) \\frac{10}{3}\n(C) 7\n(D) \\frac{14}{3}", + "choices": [ + "\\frac{20}{3}", + "\\frac{10}{3}", + "7", + "\\frac{14}{3}" + ], + "answer": "\\frac{20}{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{20}{3}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 112, + "img_width": 183, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "188": { + "question_id": "188", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many methods in the table achieve an A-847 score higher than 20.0?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 634, + "img_width": 2226, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "190": { + "question_id": "190", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 132, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "192": { + "question_id": "192", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the diameter CD of \u2299O crosses the midpoint G of chord EF, \u2220DCF = 20.0, then \u2220EOD is equal to ()\nChoices:\n(A) 10\u00b0\n(B) 20\u00b0\n(C) 40\u00b0\n(D) 80\u00b0", + "choices": [ + "10\u00b0", + "20\u00b0", + "40\u00b0", + "80\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 127, + "img_width": 101, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "194": { + "question_id": "194", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: On average, how many people can commute on this vehicle?", + "choices": null, + "answer": "50", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 408, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "196": { + "question_id": "196", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\u6240\u793a\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u5df2\u77e5\u70b9D\uff0cE\uff0cF\u5206\u522b\u4e3a\u8fb9BC\uff0cAD\uff0cCE\u7684\u4e2d\u70b9\uff0c\u4e14S\u25b3ABC\uff1d4cm2\uff0c\u5219S\u25b3DEF\u7b49\u4e8e\uff08\uff09\nChoices:\n(A) 2cm2\n(B) 1cm2\n(C) 0.5cm2\n(D) 0.25cm2", + "choices": [ + "2cm2", + "1cm2", + "0.5cm2", + "0.25cm2" + ], + "answer": "0.5cm2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2cm2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 81, + "img_width": 110, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "198": { + "question_id": "198", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Calculate the missing value.\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4", + "choices": [ + "1", + "2", + "3", + "4" + ], + "answer": "1", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 756, + "img_width": 890, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "200": { + "question_id": "200", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Sky Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 404, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "202": { + "question_id": "202", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "204": { + "question_id": "204", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: \u0627\u0632 \u0633\u0645\u062a \u0631\u0627\u0633\u062a \u062a\u0635\u0648\u06cc\u0631 \u062f\u0631\u0628 \u062f\u0648\u0645 \u0686\u0646\u062f \u0634\u06cc\u0634\u0647 \u0628\u062f\u0648\u0646 \u0631\u0646\u06af \u062f\u0627\u0631\u0647\u061f", + "choices": null, + "answer": "12", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 376, + "img_width": 564, + "language": "persian", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "ParsVQA-Caps", + "split": "testmini", + "task": "visual question answering" + }, + "206": { + "question_id": "206", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the scale factor from $Q$ to $Q'$.\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5", + "choices": [ + "2", + "3", + "4", + "5" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 611, + "img_width": 731, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "208": { + "question_id": "208", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the leftmost and the rigtmost person? (Unit: years)", + "choices": null, + "answer": "5", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 195, + "img_width": 300, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "210": { + "question_id": "210", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 370, + "img_width": 493, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "212": { + "question_id": "212", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cornflower the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 403, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "214": { + "question_id": "214", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of amount earned from merchandise imports in Canada greater than the average percentage of amount earned from merchandise imports in Canada taken over all years ?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1109, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "216": { + "question_id": "216", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people like the most preferred object in the whole chart?", + "choices": null, + "answer": "90", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "218": { + "question_id": "218", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large red rubber blocks. Subtract all tiny red matte objects. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "220": { + "question_id": "220", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u2299O is the circumscribed circle of the quadrilateral ABCD, if \u2220O = 110.0, then the degree of \u2220C is ()\nChoices:\n(A) 125\u00b0\n(B) 120\u00b0\n(C) 105\u00b0\n(D) 90\u00b0", + "choices": [ + "125\u00b0", + "120\u00b0", + "105\u00b0", + "90\u00b0" + ], + "answer": "125\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "125\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 128, + "img_width": 124, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "222": { + "question_id": "222", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue shiny spheres. Subtract all big blue shiny cubes. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "224": { + "question_id": "224", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this a periodic function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 744, + "img_width": 1114, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "226": { + "question_id": "226", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past three.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "228": { + "question_id": "228", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of circle O, DB and DC are respectively tangent to circle O at points B and C. If \u2220ACE = 25.0, then the degree of \u2220D is ()\nChoices:\n(A) 50\u00b0\n(B) 55\u00b0\n(C) 60\u00b0\n(D) 65\u00b0", + "choices": [ + "50\u00b0", + "55\u00b0", + "60\u00b0", + "65\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 97, + "img_width": 137, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "230": { + "question_id": "230", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracy higher than 9 in at least one dataset?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "232": { + "question_id": "232", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagram below is a model of two solutions. Each pink ball represents one particle of solute. Which solution has a higher concentration of pink particles?\nChoices:\n(A) neither; their concentrations are the same\n(B) Solution B\n(C) Solution A", + "choices": [ + "neither; their concentrations are the same", + "Solution B", + "Solution A" + ], + "answer": "Solution B", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; their concentrations are the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 251, + "img_width": 378, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "234": { + "question_id": "234", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure shown above, AC = 6. What is the length of segment AB?\nChoices:\n(A) 3\n(B) 5\n(C) 6\n(D) 7\n(E) It cannot be determined from the information given", + "choices": [ + "3", + "5", + "6", + "7", + "It cannot be determined from the information given" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 378, + "img_width": 434, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "236": { + "question_id": "236", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $z$.\nChoices:\n(A) 7\n(B) 9\n(C) 12\n(D) 15", + "choices": [ + "7", + "9", + "12", + "15" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 423, + "img_width": 447, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "238": { + "question_id": "238", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find PT\nChoices:\n(A) 6\n(B) \\frac { 20 } { 3 }\n(C) 7\n(D) 22 / 3", + "choices": [ + "6", + "\\frac { 20 } { 3 }", + "7", + "22 / 3" + ], + "answer": "\\frac { 20 } { 3 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 250, + "img_width": 238, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "240": { + "question_id": "240", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2387, + "img_width": 3500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "242": { + "question_id": "242", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle A$ of quadrilateral ABCD\nChoices:\n(A) 45\n(B) 90\n(C) 135\n(D) 180", + "choices": [ + "45", + "90", + "135", + "180" + ], + "answer": "135", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 381, + "img_width": 621, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "244": { + "question_id": "244", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Aqua have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 500, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "246": { + "question_id": "246", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Assume that all gases are perfect and that data refer to 298 K unless otherwise stated. In 1995, the Intergovernmental Panel on Climate Change (IPCC) considered a global average temperature rise of $1.0-3.5^{\\circ} \\mathrm{C}$ likely by the year 2100 , with $2.0^{\\circ} \\mathrm{C}$ its best estimate. Because water vapour is itself a greenhouse gas, the increase in water vapour content of the atmosphere is of some concern to climate change experts. Predict the relative increase in water vapour in the atmosphere based on a temperature rises of $2.0 \\mathrm{~K}$, assuming that the relative humidity remains constant. (The present global mean temperature is $290 \\mathrm{~K}$, and the equilibrium vapour pressure of water at that temperature is 0.0189 bar.)", + "choices": null, + "answer": "13", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 216, + "img_width": 1098, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "248": { + "question_id": "248", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of green matte choppers greater than the number of large yellow shiny motorbikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "250": { + "question_id": "250", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The area $A$ of the shaded region is given. Find $x$. $A = 66$ cm$^2$ .\nChoices:\n(A) 4.6\n(B) 6.5\n(C) 13.0\n(D) 26.0", + "choices": [ + "4.6", + "6.5", + "13.0", + "26.0" + ], + "answer": "13.0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4.6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 286, + "img_width": 303, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "252": { + "question_id": "252", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Consider the infinitely long chain of resistors shown below. What is the resistance between terminals a and b if R=1?", + "choices": null, + "answer": "0.73", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 169, + "img_width": 463, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "254": { + "question_id": "254", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big objects that are in front of the metal fighter less than the number of things that are behind the big metallic bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "256": { + "question_id": "256", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u25b3ABC\u4e2d\uff0cAD\u5e73\u5206\u2220BAC\uff0cAD\u4ea4BC\u4e8e\u70b9D\uff0cDE\u22a5AB\uff0c\u5782\u8db3\u4e3aE\uff0c\u82e5DE\uff1d3\uff0cAC\uff1d4\uff0c\u5219\u25b3ADC\u7684\u9762\u79ef\u4e3a\uff08\uff09\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 75, + "img_width": 148, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "258": { + "question_id": "258", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An employee at the craft store counted the number of red buttons in each bag of mixed buttons. How many bags had at least 60 red buttons but fewer than 81 red buttons?'", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 224, + "img_width": 156, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "260": { + "question_id": "260", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the derivative of the function positive between [1, 2] assuming that it's differentiable?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 368, + "img_width": 412, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "262": { + "question_id": "262", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between genres of tv shows watched by highest female and lowest female?", + "choices": null, + "answer": "39", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 756, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "264": { + "question_id": "264", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For Group C, in which week is the cumulative increase in weight , the highest?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2237, + "img_width": 1754, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "266": { + "question_id": "266", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which has the most uneven shape?\nChoices:\n(A) oblique\n(B) obtuse\n(C) cordate\n(D) truncate", + "choices": [ + "oblique", + "obtuse", + "cordate", + "truncate" + ], + "answer": "oblique", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "oblique", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 225, + "img_width": 240, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "268": { + "question_id": "268", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Colton wants to buy 1+3/10 kilograms of English muffins. How much will he spend? (Unit: $)", + "choices": null, + "answer": "10.4", + "extraction": "5.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 194, + "img_width": 273, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "270": { + "question_id": "270", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A and B are three points on \u2299O and AB = AC. Connect BO and CO, if \u2220ABC = 65.0, then the degree of \u2220BOC is ()\nChoices:\n(A) 50\u00b0\n(B) 65\u00b0\n(C) 100\u00b0\n(D) 130\u00b0", + "choices": [ + "50\u00b0", + "65\u00b0", + "100\u00b0", + "130\u00b0" + ], + "answer": "100\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 114, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "272": { + "question_id": "272", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time does the clock show?\nChoices:\n(A) 9:30\n(B) 1:30\n(C) 4:30\n(D) 5:30\n(E) 11:30", + "choices": [ + "9:30", + "1:30", + "4:30", + "5:30", + "11:30" + ], + "answer": "4:30", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "9:30", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 261, + "img_width": 261, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "274": { + "question_id": "274", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u3001BC\u3001CD\u3001DA\u90fd\u662f\u2299O\u7684\u5207\u7ebf\uff0c\u5df2\u77e5AD\uff1d2\uff0cBC\uff1d5\uff0c\u5219AB+CD\u7684\u503c\u662f\uff08\uff09\nChoices:\n(A) 14\n(B) 12\n(C) 9\n(D) 7", + "choices": [ + "14", + "12", + "9", + "7" + ], + "answer": "7", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "14", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 119, + "img_width": 151, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "276": { + "question_id": "276", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, it is known that the radius of \u2299O is 5.0 and the chord AB = 8.0, then the distance from the center O to AB is ()\nChoices:\n(A) 1mm\n(B) 2mm\n(C) 3mm\n(D) 4mm", + "choices": [ + "1mm", + "2mm", + "3mm", + "4mm" + ], + "answer": "3mm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1mm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 102, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "278": { + "question_id": "278", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the following objects, which one has the best PSNR score?\nChoices:\n(A) Lego\n(B) Mats\n(C) Mic\n(D) Ship", + "choices": [ + "Lego", + "Mats", + "Mic", + "Ship" + ], + "answer": "Mic", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Lego", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 940, + "img_width": 1478, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "280": { + "question_id": "280", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, ABCDEF is a regular hexagon, and its center is point O. What is the value of x?\nChoices:\n(A) 80\n(B) 60\n(C) 40\n(D) 30\n(E) 20", + "choices": [ + "80", + "60", + "40", + "30", + "20" + ], + "answer": "60", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "80", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 123, + "img_width": 130, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "282": { + "question_id": "282", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percent of the sun is showing?", + "choices": null, + "answer": "100", + "extraction": "100", + "prediction": "100", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "284": { + "question_id": "284", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the accuracy of the algorithm with lowest accuracy?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "286": { + "question_id": "286", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5c06\u4e00\u6839\u957f\u5ea6\u4e3a8cm\uff0c\u81ea\u7136\u4f38\u76f4\u7684\u5f39\u6027\u76ae\u7b4bAB\u4e24\u7aef\u56fa\u5b9a\u5728\u6c34\u5e73\u7684\u684c\u9762\u4e0a\uff0c\u7136\u540e\u628a\u76ae\u7b4b\u4e2d\u70b9C\u7ad6\u76f4\u5411\u4e0a\u62c9\u53473cm\u5230\u70b9D\uff0c\u5219\u6b64\u65f6\u8be5\u5f39\u6027\u76ae\u7b4b\u88ab\u62c9\u957f\u4e86\uff08\uff09\nChoices:\n(A) 6cm\n(B) 5cm\n(C) 4cm\n(D) 2cm", + "choices": [ + "6cm", + "5cm", + "4cm", + "2cm" + ], + "answer": "2cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 82, + "img_width": 250, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "288": { + "question_id": "288", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which of the following value ranges of \u03bb2 does the percentage of Attack Effectiveness begin to be lower than that of Diversity?\nChoices:\n(A) 0.0 - 0.2\n(B) 0.2 - 0.4\n(C) 0.4 - 0.6\n(D) 0.6 - 0.8\n(E) 0.8 - 1.0", + "choices": [ + "0.0 - 0.2", + "0.2 - 0.4", + "0.4 - 0.6", + "0.6 - 0.8", + "0.8 - 1.0" + ], + "answer": "0.0 - 0.2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.0 - 0.2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 606, + "img_width": 2144, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "290": { + "question_id": "290", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5e73\u884c\u7ebfAB\uff0cCD\u88ab\u76f4\u7ebfAE\u6240\u622a\uff0e\u82e5\u22201\uff1d105\u00b0\uff0c\u5219\u22202\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 75\u00b0\n(B) 85\u00b0\n(C) 95\u00b0\n(D) 105\u00b0", + "choices": [ + "75\u00b0", + "85\u00b0", + "95\u00b0", + "105\u00b0" + ], + "answer": "75\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "75\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 119, + "img_width": 132, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "292": { + "question_id": "292", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Rebecca Purple greater than Olive Drab?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 461, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "294": { + "question_id": "294", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: In Fig. 21-25, the particles have charges $q_1=-q_2=100 \\mathrm{nC}$ and $q_3=-q_4=200 \\mathrm{nC}$, and distance $a=$ $5.0 \\mathrm{~cm}$. What is the $x$ component of the net electrostatic force on particle 3?", + "choices": null, + "answer": "0.17", + "extraction": "-0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 293, + "img_width": 247, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "296": { + "question_id": "296", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The value of f(-3) is ____ the value of f(2)\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "equal to", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 776, + "img_width": 1430, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "298": { + "question_id": "298", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A decrease in rabbits would affect whose food source?\nChoices:\n(A) mountain lion\n(B) producer\n(C) decomposer\n(D) energy", + "choices": [ + "mountain lion", + "producer", + "decomposer", + "energy" + ], + "answer": "mountain lion", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "mountain lion", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 699, + "img_width": 768, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "300": { + "question_id": "300", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{HK}$ and $\\overline{IG}$ are diameters of $\\odot L$. Find $m \\widehat {IHJ}$.\nChoices:\n(A) 59\n(B) 135\n(C) 270\n(D) 301", + "choices": [ + "59", + "135", + "270", + "301" + ], + "answer": "270", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "59", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 492, + "img_width": 510, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "302": { + "question_id": "302", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the green curve?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a logarithmic function", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 300, + "img_width": 531, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "304": { + "question_id": "304", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In the figure above, two line segments meet at a point on line l. If the value of y is equal to the square of the value of x, what is the value of y?", + "choices": null, + "answer": "100", + "extraction": "16", + "prediction": "16", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 247, + "img_width": 431, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "306": { + "question_id": "306", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the bed much larger than the kitten?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "308": { + "question_id": "308", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this function most likely be?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a trigonometric function", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 276, + "img_width": 482, + "language": "english", + "skills": [ + "algebraic reasoning", + "statistical reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "310": { + "question_id": "310", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find z\nChoices:\n(A) 10\n(B) \\frac { 32 } { 3 }\n(C) \\frac { 40 } { 3 }\n(D) \\frac { 50 } { 3 }", + "choices": [ + "10", + "\\frac { 32 } { 3 }", + "\\frac { 40 } { 3 }", + "\\frac { 50 } { 3 }" + ], + "answer": "\\frac { 40 } { 3 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 218, + "img_width": 350, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "312": { + "question_id": "312", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: An Idaho farmer has been monitoring crop prices over time. In 2003, which crop cost the most per cwt?'\nChoices:\n(A) potatoes\n(B) peas\n(C) apples\n(D) canola", + "choices": [ + "potatoes", + "peas", + "apples", + "canola" + ], + "answer": "apples", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "potatoes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 204, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "314": { + "question_id": "314", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Crimson the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 522, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "316": { + "question_id": "316", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, given that points A, B, and C are on \u2299O, \u2220AOB = 100.0, then the degree of \u2220ACB is ()\nChoices:\n(A) 50\u00b0\n(B) 80\u00b0\n(C) 100\u00b0\n(D) 200\u00b0", + "choices": [ + "50\u00b0", + "80\u00b0", + "100\u00b0", + "200\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 105, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "318": { + "question_id": "318", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the area of the figure. Round to the nearest tenth if necessary.\nChoices:\n(A) 191.5\n(B) 1128\n(C) 2256\n(D) 4512", + "choices": [ + "191.5", + "1128", + "2256", + "4512" + ], + "answer": "2256", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "191.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 175, + "img_width": 239, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "320": { + "question_id": "320", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u2220C\uff1d90\u00b0\uff0cAB\uff1d13\uff0cAC\uff1d5\uff0cD\u3001E\u5206\u522b\u662fAC\u3001AB\u7684\u4e2d\u70b9\uff0c\u5219DE\u7684\u957f\u662f\uff08\uff09\nChoices:\n(A) 6.5\n(B) 6\n(C) 5.5\n(D) \\frac{\u221a{119}}{2}", + "choices": [ + "6.5", + "6", + "5.5", + "\\frac{\u221a{119}}{2}" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 90, + "img_width": 170, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "322": { + "question_id": "322", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cA\uff0cB\u4e24\u70b9\u88ab\u6c60\u5858\u9694\u5f00\uff0c\u5728AB\u5916\u9009\u4e00\u70b9C\uff0c\u4f7f\u70b9C\u80fd\u76f4\u63a5\u5230\u8fbe\u70b9A\u548c\u70b9B\uff0c\u8fde\u63a5AC\u548cBC\uff0c\u5e76\u5206\u522b\u627e\u51faAC\u548cBC\u7684\u4e2d\u70b9M\uff0cN\uff0e\u5982\u679c\u6d4b\u5f97MN\uff1d20m\uff0c\u90a3\u4e48A\uff0cB\u4e24\u70b9\u7684\u8ddd\u79bb\u662f\uff08\uff09\nChoices:\n(A) 10m\n(B) 20m\n(C) 35m\n(D) 40m", + "choices": [ + "10m", + "20m", + "35m", + "40m" + ], + "answer": "40m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 107, + "img_width": 148, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "324": { + "question_id": "324", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between highest and lowest value of dark blue bar?", + "choices": null, + "answer": "53", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 726, + "img_width": 800, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "326": { + "question_id": "326", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the pencil to the nearest inch. The pencil is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "7", + "prediction": "7", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 170, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "328": { + "question_id": "328", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of accuracies of the algorithm candy for all the datasets?", + "choices": null, + "answer": "18", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "330": { + "question_id": "330", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny cubes. Subtract all brown balls. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "332": { + "question_id": "332", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A taxi cab driver tracked how many miles he drove each month. How many miles did the taxi cab driver drive in total in January and April? (Unit: miles)", + "choices": null, + "answer": "7873", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 125, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "334": { + "question_id": "334", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer yellow metal tandem bikes in front of the small yellow metallic bicycle than metal bicycles on the left side of the large brown jet?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "336": { + "question_id": "336", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest individual bar in the whole chart?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "338": { + "question_id": "338", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle ABC above, AB = AC, E is the midpoint of line AB, and D is the midpoint of line AC. If AE = x and ED = 4, what is length BC?\nChoices:\n(A) 6\n(B) 8\n(C) 2*x\n(D) 4*x\n(E) 4*x^2", + "choices": [ + "6", + "8", + "2*x", + "4*x", + "4*x^2" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 167, + "img_width": 121, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "340": { + "question_id": "340", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following domains has the most number of BPE Tokens?\nChoices:\n(A) Legal \n(B) Code \n(C) Conversational \n(D) Math \n(E) Science\n(F) Books \n(G) News \n(H) Encyclopedic", + "choices": [ + "Legal ", + "Code ", + "Conversational ", + "Math ", + "Science", + "Books ", + "News ", + "Encyclopedic" + ], + "answer": "Science", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Legal ", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1176, + "img_width": 2142, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "342": { + "question_id": "342", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, which of the following is the greatest?\nChoices:\n(A) a\n(B) b\n(C) c\n(D) d\n(E) e", + "choices": [ + "a", + "b", + "c", + "d", + "e" + ], + "answer": "d", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 299, + "img_width": 405, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "344": { + "question_id": "344", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of metal cars that are left of the tiny matte school bus greater than the number of tiny cyan double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "346": { + "question_id": "346", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the y-intercept of this function?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 339, + "img_width": 341, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "348": { + "question_id": "348", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are the pieces in triangle cuts?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 375, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "350": { + "question_id": "350", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "4", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 89, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "352": { + "question_id": "352", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people will fit in the smaller vehicle?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "354": { + "question_id": "354", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracies higher than 90?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "356": { + "question_id": "356", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer big motorbikes than rubber choppers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "358": { + "question_id": "358", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the cubes is the same as the unfolded cube?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "A", + "extraction": "A", + "prediction": "A", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 517, + "img_width": 326, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "360": { + "question_id": "360", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If $\\frac{I J}{X J}=\\frac{HJ}{YJ}, m \\angle W X J=130$\r\nand $m \\angle WZG=20,$ find $m \\angle YIZ$\nChoices:\n(A) 40\n(B) 50\n(C) 65\n(D) 110", + "choices": [ + "40", + "50", + "65", + "110" + ], + "answer": "50", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 370, + "img_width": 721, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "362": { + "question_id": "362", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all cyan cylinders. Subtract all tiny purple rubber objects. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "364": { + "question_id": "364", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, and points C and D are on \u2299O. If \u2220ABD = 50.0, then the degree of \u2220BCD is ()\nChoices:\n(A) 30\u00b0\n(B) 35\u00b0\n(C) 40\u00b0\n(D) 45\u00b0", + "choices": [ + "30\u00b0", + "35\u00b0", + "40\u00b0", + "45\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 114, + "img_width": 127, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "366": { + "question_id": "366", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "2", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 320, + "img_width": 250, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "368": { + "question_id": "368", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of yellow matte school buss greater than the number of big yellow metal cars?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "370": { + "question_id": "370", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram of the food web shown, if the number of ferns decrease, the supply of salmon will most likely?\nChoices:\n(A) decrease\n(B) can't tell\n(C) stay same\n(D) increase", + "choices": [ + "decrease", + "can't tell", + "stay same", + "increase" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 680, + "img_width": 880, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "372": { + "question_id": "372", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small gray spheres. Subtract all cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "374": { + "question_id": "374", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms calf and ivory?", + "choices": null, + "answer": "13", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "376": { + "question_id": "376", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all purple matte cubes. Subtract all tiny gray metal cubes. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "378": { + "question_id": "378", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAD\u662f\u25b3ABC\u7684\u4e2d\u7ebf\uff0cE\u4e3aAD\u7684\u4e2d\u70b9\uff0c\u25b3ABE\u7684\u9762\u79ef\u4e3a2\uff0c\u5219\u25b3ABC\u7684\u9762\u79ef\u4e3a\uff08\uff09\nChoices:\n(A) 5\n(B) 6\n(C) 7\n(D) 8", + "choices": [ + "5", + "6", + "7", + "8" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 118, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "380": { + "question_id": "380", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For how many years that the percentage value over 4?", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "382": { + "question_id": "382", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the building through the window at least five stories tall?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 400, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "384": { + "question_id": "384", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 495, + "img_width": 626, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "386": { + "question_id": "386", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x\nChoices:\n(A) 5\n(B) 10\n(C) 10 \\sqrt { 3 }\n(D) 20", + "choices": [ + "5", + "10", + "10 \\sqrt { 3 }", + "20" + ], + "answer": "10 \\sqrt { 3 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 247, + "img_width": 164, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "388": { + "question_id": "388", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Express the ratio of $\\tan M$ as a decimal to the nearest hundredth.\nChoices:\n(A) 0.38\n(B) 0.42\n(C) 0.92\n(D) 2.40", + "choices": [ + "0.38", + "0.42", + "0.92", + "2.40" + ], + "answer": "0.42", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.38", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 209, + "img_width": 342, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "390": { + "question_id": "390", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer jets that are left of the small brown suv than objects right of the big shiny car?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "392": { + "question_id": "392", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Mr. Huffman, a P.E. teacher, wrote down how much weight each of his students could lift. How many people lifted at least 46 pounds? (Unit: people)", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 136, + "img_width": 197, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "394": { + "question_id": "394", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following environments has the least GPU days for training?\nChoices:\n(A) HomeGrid\n(B) Msgr S1\n(C) Msgr S2\n(D) Msgr S3\n(E) VLN\n(F) LangRoom", + "choices": [ + "HomeGrid", + "Msgr S1", + "Msgr S2", + "Msgr S3", + "VLN", + "LangRoom" + ], + "answer": "LangRoom", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "HomeGrid", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 858, + "img_width": 1854, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "396": { + "question_id": "396", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, if all the algae dies then water flea population will\nChoices:\n(A) remains the same\n(B) decrease\n(C) increase\n(D) NA", + "choices": [ + "remains the same", + "decrease", + "increase", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "remains the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 576, + "img_width": 720, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "398": { + "question_id": "398", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 942, + "img_width": 727, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "400": { + "question_id": "400", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At which Episode ID does the Retroformer attain its peak Success rate (%)?\nChoices:\n(A) 1.0\n(B) 1.5\n(C) 2.0\n(D) 2.5\n(E) 3.0\n(F) 3.5\n(G) 4.0", + "choices": [ + "1.0", + "1.5", + "2.0", + "2.5", + "3.0", + "3.5", + "4.0" + ], + "answer": "4.0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1.0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 942, + "img_width": 1196, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "402": { + "question_id": "402", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the food chain diagram below, which animal would most directly lack food if Grasshoppers get exterminated?\nChoices:\n(A) Rabbit\n(B) Deer\n(C) Frogs\n(D) Wolf", + "choices": [ + "Rabbit", + "Deer", + "Frogs", + "Wolf" + ], + "answer": "Frogs", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Rabbit", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 735, + "img_width": 909, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "404": { + "question_id": "404", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the following schedule. Which activity begins at 11.50 A.M.?'\nChoices:\n(A) figure skating practice\n(B) private class\n(C) adult class\n(D) children's class", + "choices": [ + "figure skating practice", + "private class", + "adult class", + "children's class" + ], + "answer": "children's class", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "figure skating practice", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 217, + "img_width": 325, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "406": { + "question_id": "406", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many snowmen are there?", + "choices": null, + "answer": "15", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 183, + "img_width": 714, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "408": { + "question_id": "408", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find z.\nChoices:\n(A) 6\n(B) 6 \\sqrt { 2 }\n(C) 6 \\sqrt { 3 }\n(D) 6 \\sqrt { 5 }", + "choices": [ + "6", + "6 \\sqrt { 2 }", + "6 \\sqrt { 3 }", + "6 \\sqrt { 5 }" + ], + "answer": "6 \\sqrt { 5 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 238, + "img_width": 362, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "410": { + "question_id": "410", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the perimeter of $\\triangle D E F,$ if $\\triangle D E F \\sim \\triangle C B F,$ perimeter of $\\triangle C B F=27, D F=6,$ and $F C=8$\nChoices:\n(A) 20.25\n(B) 21\n(C) 27\n(D) 36", + "choices": [ + "20.25", + "21", + "27", + "36" + ], + "answer": "20.25", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20.25", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 226, + "img_width": 405, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "412": { + "question_id": "412", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Tanner has $35. Does he have enough to buy a black jacket and a pair of shorts?'\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 192, + "img_width": 235, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "414": { + "question_id": "414", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If $ST=8, TR=4$, and $PT=6$, find $QR$.\nChoices:\n(A) 6\n(B) 8\n(C) 9\n(D) 10", + "choices": [ + "6", + "8", + "9", + "10" + ], + "answer": "9", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 386, + "img_width": 509, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "416": { + "question_id": "416", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the highest volume written on the blender?", + "choices": null, + "answer": "800", + "extraction": "16", + "prediction": "16", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1024, + "img_width": 768, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "418": { + "question_id": "418", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the number of grasshoppers decreases, what will the population of spiders most likely do?\nChoices:\n(A) remain the same\n(B) increase\n(C) decrease\n(D) NA", + "choices": [ + "remain the same", + "increase", + "decrease", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "remain the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "420": { + "question_id": "420", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the lowest value on the Y axis?", + "choices": null, + "answer": "0.0", + "extraction": "1.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 1763, + "img_width": 2256, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "422": { + "question_id": "422", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "10", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "424": { + "question_id": "424", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the food half eaten?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 428, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "426": { + "question_id": "426", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u82e5DE\u662f\u25b3ABC\u7684\u4e2d\u4f4d\u7ebf\uff0c\u25b3ADE\u7684\u5468\u957f\u4e3a1\uff0c\u5219\u25b3ABC\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4", + "choices": [ + "1", + "2", + "3", + "4" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 154, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "428": { + "question_id": "428", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "28", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 968, + "img_width": 1259, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "430": { + "question_id": "430", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The derivative of f(x) at x=0 is ____ that at x=5\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "smaller than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 393, + "img_width": 552, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "432": { + "question_id": "432", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of undernourished male children greater than 0.4 %?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1085, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "434": { + "question_id": "434", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, side AC of triangle ABC is on line l. What is x in terms of k?\nChoices:\n(A) 60-k\n(B) k\n(C) 60+k\n(D) 120-k\n(E) 120-2*k", + "choices": [ + "60-k", + "k", + "60+k", + "120-k", + "120-2*k" + ], + "answer": "60-k", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60-k", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 157, + "img_width": 215, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "436": { + "question_id": "436", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracy lower than 8 in at least one dataset?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "438": { + "question_id": "438", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "13", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 367, + "img_width": 329, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "440": { + "question_id": "440", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the white plate half full?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 640, + "img_width": 480, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "442": { + "question_id": "442", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many objects are preferred by more than 7 people in at least one category?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "444": { + "question_id": "444", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the two genders?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "446": { + "question_id": "446", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u70b9D\u662f\u25b3ABC\u7684\u5185\u5fc3\uff0c\u8fde\u63a5DB\uff0cDC\uff0c\u8fc7\u70b9D\u4f5cEF\u2225BC\u5206\u522b\u4ea4AB\u3001AC\u4e8e\u70b9E\u3001F\uff0c\u82e5BE+CF\uff1d8\uff0c\u5219EF\u7684\u957f\u5ea6\u4e3a\uff08\uff09\nChoices:\n(A) 4\n(B) 5\n(C) 8\n(D) 16", + "choices": [ + "4", + "5", + "8", + "16" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 105, + "img_width": 144, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "448": { + "question_id": "448", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year recorded the highest share of Urban secondary schools with access to electricity in India?", + "choices": null, + "answer": "2016", + "extraction": "2016", + "prediction": "2016", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "450": { + "question_id": "450", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If all the grass died, what would be most affected?\nChoices:\n(A) garter snakes\n(B) hognose snakes\n(C) hawks\n(D) grasshoppers", + "choices": [ + "garter snakes", + "hognose snakes", + "hawks", + "grasshoppers" + ], + "answer": "grasshoppers", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "garter snakes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "452": { + "question_id": "452", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the image, what is the most likely equilibrium population count?\nChoices:\n(A) 40\n(B) 60\n(C) 80\n(D) 100", + "choices": [ + "40", + "60", + "80", + "100" + ], + "answer": "80", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 366, + "img_width": 441, + "language": "english", + "skills": [ + "algebraic reasoning", + "statistical reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "454": { + "question_id": "454", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "456": { + "question_id": "456", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "458": { + "question_id": "458", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: If you add the two visible numbers, on the jerseys, what is the total sum?", + "choices": null, + "answer": "3", + "extraction": "23", + "prediction": "23", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "460": { + "question_id": "460", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If there were fewer leaves in this ecosystem, the first organism to experience change as a result would be:\nChoices:\n(A) Frogs\n(B) Crickets\n(C) Snakes\n(D) Hawks", + "choices": [ + "Frogs", + "Crickets", + "Snakes", + "Hawks" + ], + "answer": "Crickets", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Frogs", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 720, + "img_width": 960, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "462": { + "question_id": "462", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values larger than 100?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "464": { + "question_id": "464", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer for the missing picture.\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4\n(E) 5\n(F) 6", + "choices": [ + "1", + "2", + "3", + "4", + "5", + "6" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 1316, + "img_width": 1000, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "466": { + "question_id": "466", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Periwinkle intersect Yellow Green?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 487, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "468": { + "question_id": "468", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people prefer the most preferred object?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "470": { + "question_id": "470", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following models has the lowest KS Rollout Loss overall?\nChoices:\n(A) Baseline\n(B) Diffusion\n(C) PDE-Refiner\n(D) Pushforward", + "choices": [ + "Baseline", + "Diffusion", + "PDE-Refiner", + "Pushforward" + ], + "answer": "PDE-Refiner", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Baseline", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 854, + "img_width": 1422, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "472": { + "question_id": "472", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "474": { + "question_id": "474", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many miles per gallon do an average city bus get?", + "choices": null, + "answer": "25", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 333, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "476": { + "question_id": "476", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If frogs were removed from this environment what animal would potentially see an increase in its population?\nChoices:\n(A) crickets\n(B) deer\n(C) snakes\n(D) hawks", + "choices": [ + "crickets", + "deer", + "snakes", + "hawks" + ], + "answer": "crickets", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "crickets", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 405, + "img_width": 518, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "478": { + "question_id": "478", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the diamond ABCD, two diagonal lines AC = 12.0, BD = 16.0, then the edge length of this diamond is ()\nChoices:\n(A) 10\n(B) 8\n(C) 6\n(D) 5", + "choices": [ + "10", + "8", + "6", + "5" + ], + "answer": "10", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 97, + "img_width": 125, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "480": { + "question_id": "480", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny blue metal bicycles behind the small sedan less than the number of purple fighters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "482": { + "question_id": "482", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, triangle ABC is inscribed in the circle with center O and diameter AC. If AB = AO, what is the degree measure of angle ABO?\nChoices:\n(A) 15*\\degree\n(B) 30*\\degree\n(C) 45*\\degree\n(D) 60*\\degree\n(E) 90*\\degree", + "choices": [ + "15*\\degree", + "30*\\degree", + "45*\\degree", + "60*\\degree", + "90*\\degree" + ], + "answer": "60*\\degree", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15*\\degree", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 134, + "img_width": 143, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "484": { + "question_id": "484", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "486": { + "question_id": "486", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0cAB\uff1d5\uff0cAD\uff1d7\uff0c\u5219ABCD\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 12\n(B) 14\n(C) 35\n(D) 24", + "choices": [ + "12", + "14", + "35", + "24" + ], + "answer": "24", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "12", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 79, + "img_width": 156, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "488": { + "question_id": "488", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown things. Subtract all tiny blue metallic objects. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "490": { + "question_id": "490", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9A\u3001C\u3001B\u5728\u540c\u4e00\u76f4\u7ebf\u4e0a\uff0cDC\u22a5EC\uff0c\u82e5\u2220BCD\uff1d40\u00b0\uff0c\u5219\u2220ACE\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 30\u00b0\n(B) 40\u00b0\n(C) 50\u00b0\n(D) 60\u00b0", + "choices": [ + "30\u00b0", + "40\u00b0", + "50\u00b0", + "60\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 88, + "img_width": 155, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "492": { + "question_id": "492", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the \u2299O with a radius of 2.0, C is a point on the extended line of the diameter AB, CD is tangent to the circle at point D. Connect AD, given that \u2220DAC = 30.0, the length of the line segment CD is ()\nChoices:\n(A) 1\n(B) \u221a{3}\n(C) 2\n(D) 2\u221a{3}", + "choices": [ + "1", + "\u221a{3}", + "2", + "2\u221a{3}" + ], + "answer": "2\u221a{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 158, + "img_width": 203, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "494": { + "question_id": "494", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "3", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 97, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "496": { + "question_id": "496", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "20", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "498": { + "question_id": "498", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the water half full?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 478, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "500": { + "question_id": "500", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1236, + "img_width": 987, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "502": { + "question_id": "502", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tandem bikes that are behind the brown metal bicycle than matte trucks on the left side of the green object?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "504": { + "question_id": "504", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, D and E are the points on the edges AB and AC of \u25b3ABC, DE \u2225 BC, if AD:DB=1.0:3.0, AE = 2.0, then the length of AC is ()\nChoices:\n(A) 10\n(B) 8\n(C) 6\n(D) 4", + "choices": [ + "10", + "8", + "6", + "4" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 86, + "img_width": 117, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "506": { + "question_id": "506", + "query": "Hint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?", + "choices": null, + "answer": "[2014, 2016]", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "true_false": false, + "question_type": "free_form", + "answer_type": "list", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "508": { + "question_id": "508", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The owner of a bed and breakfast inn recalled how many guests the inn had hosted each day. What is the median of the numbers?'", + "choices": null, + "answer": "5", + "extraction": "5", + "prediction": "5", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 241, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "510": { + "question_id": "510", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220C = 90.0, AC = 4.0, AB = 5.0, then the value of sinB is ()\nChoices:\n(A) \\frac{2}{3}\n(B) \\frac{3}{5}\n(C) \\frac{3}{4}\n(D) \\frac{4}{5}", + "choices": [ + "\\frac{2}{3}", + "\\frac{3}{5}", + "\\frac{3}{4}", + "\\frac{4}{5}" + ], + "answer": "\\frac{4}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{2}{3}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 186, + "img_width": 119, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "512": { + "question_id": "512", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the y coordinate of the center of mass of the isosceles right triangle of uniform areal density shown in Figure 9-C?", + "choices": null, + "answer": "0.24", + "extraction": "0.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 356, + "img_width": 497, + "language": "english", + "skills": [ + "geometry reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "514": { + "question_id": "514", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you wanted the leaf with the least main veins, which would you choose?\nChoices:\n(A) 3 main veins\n(B) pinnate\n(C) reticulate\n(D) palmate", + "choices": [ + "3 main veins", + "pinnate", + "reticulate", + "palmate" + ], + "answer": "3 main veins", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3 main veins", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 236, + "img_width": 559, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "516": { + "question_id": "516", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are most the stepping stones square?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 339, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "518": { + "question_id": "518", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2211, + "img_width": 2838, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "520": { + "question_id": "520", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Magenta have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 741, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "522": { + "question_id": "522", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 86, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "524": { + "question_id": "524", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The Kingwood Ski Resort asked its guests how many times they went sledding last winter. How many guests went sledding more than 2 times?'", + "choices": null, + "answer": "0", + "extraction": "11", + "prediction": "11", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 163, + "img_width": 351, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "526": { + "question_id": "526", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) slide\n(B) flip\n(C) turn", + "choices": [ + "slide", + "flip", + "turn" + ], + "answer": "slide", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "slide", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 104, + "img_width": 253, + "language": "english", + "skills": [ + "geometry reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "528": { + "question_id": "528", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u2225CD\uff0cBD\u22a5CF\uff0c\u5782\u8db3\u4e3aB\uff0c\u2220ABF\uff1d35\u00b0\uff0c\u5219\u2220BDC\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 25\u00b0\n(B) 35\u00b0\n(C) 45\u00b0\n(D) 55\u00b0", + "choices": [ + "25\u00b0", + "35\u00b0", + "45\u00b0", + "55\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 135, + "img_width": 194, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "530": { + "question_id": "530", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The advertising agency counted the number of billboards in each city in the state. How many cities have fewer than 70 billboards? (Unit: cities)", + "choices": null, + "answer": "9", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 180, + "img_width": 140, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "532": { + "question_id": "532", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer gray trucks that are in front of the large aeroplane than big yellow metal objects in front of the purple object?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "534": { + "question_id": "534", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of stunted female children greater than the average percentage of stunted female children taken over all years ?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 883, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "536": { + "question_id": "536", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A, B, and C are on \u2299O, if \u2220C = 35.0, then \u2220AOB = ()\nChoices:\n(A) 17.5\u00b0\n(B) 35\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "17.5\u00b0", + "35\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "70\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "17.5\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 105, + "img_width": 115, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "538": { + "question_id": "538", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the two concentric circles, the chord AB of the great circle is tangent to the small circle at point C. If AB = 6.0, the area of \u200b\u200bthe ring is ()\nChoices:\n(A) 9\u03c0\n(B) 6\u03c0\n(C) 3\u03c0\n(D) \u03c0", + "choices": [ + "9\u03c0", + "6\u03c0", + "3\u03c0", + "\u03c0" + ], + "answer": "9\u03c0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "9\u03c0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 115, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "540": { + "question_id": "540", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5", + "choices": [ + "3/11", + "8/11", + "6/11", + "3/5" + ], + "answer": "3/11", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3/11", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 103, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "542": { + "question_id": "542", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many models in the figure achieve an Acc score greater than 60?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scatter plot", + "grade": "college", + "img_height": 1358, + "img_width": 1690, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "544": { + "question_id": "544", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the total percentage of people who say that they do either less or more often than the usual amount of exercise during the coronavirus pandemic in the United States as of April 2020?", + "choices": null, + "answer": "44", + "extraction": "77", + "prediction": "77", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "546": { + "question_id": "546", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the overall ratio of male to female?", + "choices": null, + "answer": "1", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "548": { + "question_id": "548", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer cyan jets than big buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "550": { + "question_id": "550", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the accuracy of the algorithm with highest accuracy?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "552": { + "question_id": "552", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many queries have a p-value lower than 0.50?", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 330, + "img_width": 1726, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "554": { + "question_id": "554", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Burlywood the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 488, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "556": { + "question_id": "556", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer large red metallic things that are on the left side of the cyan shiny scooter than things that are in front of the small jet?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "558": { + "question_id": "558", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "560": { + "question_id": "560", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Salmon the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 514, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "562": { + "question_id": "562", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small green cubes. Subtract all large cylinders. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "564": { + "question_id": "564", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest and the lowest time required to import ?", + "choices": null, + "answer": "4", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1056, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "566": { + "question_id": "566", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5df2\u77e5\u25b3ABC\u224c\u25b3DEF\uff0cCD\u5e73\u5206\u2220BCA\uff0c\u82e5\u2220A\uff1d22\u00b0\uff0c\u2220CGF\uff1d88\u00b0\uff0c\u5219\u2220E\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 26\u00b0\n(B) 28\u00b0\n(C) 30\u00b0\n(D) 34\u00b0", + "choices": [ + "26\u00b0", + "28\u00b0", + "30\u00b0", + "34\u00b0" + ], + "answer": "26\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "26\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 89, + "img_width": 89, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "568": { + "question_id": "568", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For an economics project, Colleen determined the cost of ferry rides for bicycles and cars. How much higher is the fare for a car on the Mukilteu-Clinton ferry than on the Southport-Fort Fisher ferry? (Unit: $)", + "choices": null, + "answer": "2", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 349, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "570": { + "question_id": "570", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all purple matte blocks. Subtract all brown things. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "572": { + "question_id": "572", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When does the function start decreasing?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 316, + "img_width": 400, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "574": { + "question_id": "574", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do you see the figures inside these boxes? They form a pattern. Choose the figure in the answer row below that continues the pattern.\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4\n(E) 5", + "choices": [ + "1", + "2", + "3", + "4", + "5" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 378, + "img_width": 868, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "576": { + "question_id": "576", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the human brain is the largest and most anterior part of each cerebral hemisphere?\nChoices:\n(A) motor cortex\n(B) occipital lobe\n(C) temporal lobe\n(D) frontal lobe", + "choices": [ + "motor cortex", + "occipital lobe", + "temporal lobe", + "frontal lobe" + ], + "answer": "frontal lobe", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "motor cortex", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 625, + "img_width": 768, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "578": { + "question_id": "578", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9567", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 285, + "img_width": 637, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "580": { + "question_id": "580", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Slate the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 650, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "582": { + "question_id": "582", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Green greater than Rebecca Purple?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 582, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "584": { + "question_id": "584", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A philanthropic organization compared the amounts of money that its members donated to certain causes. Who donated more money to arts education, Aubrey or Connor?'\nChoices:\n(A) Connor\n(B) Aubrey", + "choices": [ + "Connor", + "Aubrey" + ], + "answer": "Connor", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Connor", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 391, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "586": { + "question_id": "586", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220BAC = 90.0, rotate \u25b3ABC clockwise around point A by 90.0 to obtain \u25b3AB\u2032C\u2032 (the corresponding point of point B is point B\u2032, and the corresponding point of point C is point C \u2032), connect CC\u2032. If \u2220CC\u2032B\u2032 = 32.0, then the size of \u2220AC\u2032B\u2032 is ()\nChoices:\n(A) 32\u00b0\n(B) 45\u00b0\n(C) 13\u00b0\n(D) 30\u00b0", + "choices": [ + "32\u00b0", + "45\u00b0", + "13\u00b0", + "30\u00b0" + ], + "answer": "13\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "32\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 75, + "img_width": 80, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "588": { + "question_id": "588", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year has more actual total income?", + "choices": null, + "answer": "1982", + "extraction": "1975", + "prediction": "1975", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2281, + "img_width": 1785, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "590": { + "question_id": "590", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "13", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 264, + "img_width": 376, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "592": { + "question_id": "592", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the global maximum of this function?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 318, + "img_width": 283, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "594": { + "question_id": "594", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When does the expenditure per student in Jamaica have the greatest increase?", + "choices": null, + "answer": "2005", + "extraction": "2011", + "prediction": "2011", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "596": { + "question_id": "596", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dodger Blue the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 407, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "598": { + "question_id": "598", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most curved beak species?\nChoices:\n(A) iiki\n(B) swallow-tanager\n(C) cliff swallow\n(D) hawfinch", + "choices": [ + "iiki", + "swallow-tanager", + "cliff swallow", + "hawfinch" + ], + "answer": "iiki", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "iiki", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 463, + "img_width": 593, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "600": { + "question_id": "600", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "A", + "extraction": "D", + "prediction": "D", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 637, + "img_width": 424, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "602": { + "question_id": "602", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Rectangle ABCD is subdivided into two identical square regions, as in the figure above. If the area of each square is 9, what is the perimeter of ABCD?", + "choices": null, + "answer": "18", + "extraction": "24", + "prediction": "24", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 219, + "img_width": 435, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "604": { + "question_id": "604", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "606": { + "question_id": "606", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the rectangle?", + "choices": null, + "answer": "10", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 209, + "img_width": 335, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "608": { + "question_id": "608", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does South Carolina have the highest value in the South ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 560, + "img_width": 775, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "610": { + "question_id": "610", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, P, Q, and R lie on the same line. P is the center of the larger circle, and Q is the center of the smaller circle. If the radius of the larger circle is 4, what is the radius of the smaller circle?\nChoices:\n(A) 1\n(B) 2\n(C) 4\n(D) 8\n(E) 16", + "choices": [ + "1", + "2", + "4", + "8", + "16" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 353, + "img_width": 411, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "612": { + "question_id": "612", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue metal things. Subtract all tiny objects. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "4", + "prediction": "4", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "614": { + "question_id": "614", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "40", + "prediction": "40", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 661, + "img_width": 915, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "616": { + "question_id": "616", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the ratio of instagram to google?", + "choices": null, + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "618": { + "question_id": "618", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "620": { + "question_id": "620", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 199, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "622": { + "question_id": "622", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0cD\u662fBC\u4e0a\u7684\u70b9\uff0c\u4e14BD\uff1d2\uff0cDC\uff1d1\uff0cS\u25b3ACD\uff1d12\uff0c\u90a3\u4e48S\u25b3ABC\u7b49\u4e8e\uff08\uff09\nChoices:\n(A) 30\n(B) 36\n(C) 72\n(D) 24", + "choices": [ + "30", + "36", + "72", + "24" + ], + "answer": "36", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 146, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "624": { + "question_id": "624", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the total unemployed labor force in Upper middle income greater than 1.6 %?", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1344, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "626": { + "question_id": "626", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown objects. Subtract all large purple cylinders. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "628": { + "question_id": "628", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0c\u2220ABC\u7684\u5e73\u5206\u7ebf\u4ea4AD\u4e8e\u70b9E\uff0c\u2220BCD\u7684\u5e73\u5206\u7ebf\u4ea4AD\u4e8e\u70b9F\uff0c\u82e5AB\uff1d3\uff0cAD\uff1d4\uff0c\u5219EF\u7684\u957f\u662f\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 2.5\n(D) 3", + "choices": [ + "1", + "2", + "2.5", + "3" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 151, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "630": { + "question_id": "630", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Find the size of angle MBD in the figure below.", + "choices": null, + "answer": "72", + "extraction": "66", + "prediction": "66", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 195, + "img_width": 340, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "632": { + "question_id": "632", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the total value of the More bar?", + "choices": null, + "answer": "52", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 350, + "img_width": 309, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "634": { + "question_id": "634", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfAB\uff0cCD\u4ea4\u4e8e\u70b9O\uff0e\u5c04\u7ebfOE\u5e73\u5206\u2220BOC\uff0c\u82e5\u2220AOD\uff1d70\u00b0\uff0c\u5219\u2220AOE\u7b49\u4e8e\uff08\uff09\nChoices:\n(A) 35\u00b0\n(B) 110\u00b0\n(C) 135\u00b0\n(D) 145\u00b0", + "choices": [ + "35\u00b0", + "110\u00b0", + "135\u00b0", + "145\u00b0" + ], + "answer": "145\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 141, + "img_width": 173, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "636": { + "question_id": "636", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "34", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 117, + "img_width": 92, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "638": { + "question_id": "638", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the under-5 male mortality rate greater than the average under-5 male mortality rate taken over all years ?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 880, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "640": { + "question_id": "640", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $\\widehat{\\mathrm{WN}}$ if $\\triangle \\mathrm{IWN}$ is equilateral and $W N=5$\nChoices:\n(A) \\frac { 3 } { 5 } \\pi\n(B) \\frac { 5 } { 3 } \\pi\n(C) 5 \\pi\n(D) 10 \\pi", + "choices": [ + "\\frac { 3 } { 5 } \\pi", + "\\frac { 5 } { 3 } \\pi", + "5 \\pi", + "10 \\pi" + ], + "answer": "\\frac { 5 } { 3 } \\pi", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac { 3 } { 5 } \\pi", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 222, + "img_width": 309, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "642": { + "question_id": "642", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Line AB is tangent to circle O. If AB = 8 and OB = 10, find the diameter of the circle.\nChoices:\n(A) 4\n(B) 6\n(C) 8\n(D) 10\n(E) 12", + "choices": [ + "4", + "6", + "8", + "10", + "12" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 443, + "img_width": 347, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "644": { + "question_id": "644", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the missing number in the picture?\nChoices:\n(A) 6\n(B) 8\n(C) 10\n(D) 11", + "choices": [ + "6", + "8", + "10", + "11" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 452, + "img_width": 494, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "646": { + "question_id": "646", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The employee at the department store counted the number of ties on each tie rack. How many racks have at least 0 ties? (Unit: racks)", + "choices": null, + "answer": "25", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 224, + "img_width": 131, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "648": { + "question_id": "648", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the minimum value of this function?", + "choices": null, + "answer": "-1", + "extraction": "-1", + "prediction": "-1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 296, + "img_width": 600, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "650": { + "question_id": "650", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the sum of maximum employment rate and minimum employment?", + "choices": null, + "answer": "31.3", + "extraction": "100.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "652": { + "question_id": "652", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 365, + "img_width": 845, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "654": { + "question_id": "654", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer yellow metallic motorbikes that are in front of the small brown metal dirtbike than big yellow dirtbikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "656": { + "question_id": "656", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Maroon the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 776, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "658": { + "question_id": "658", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 115, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "660": { + "question_id": "660", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer small fighters than yellow matte tandem bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "662": { + "question_id": "662", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much more accurate is the most accurate algorithm compared the least accurate algorithm?", + "choices": null, + "answer": "80", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "664": { + "question_id": "664", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest number of responses for Question 10, for any given % of inside sales?", + "choices": null, + "answer": "17", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2245, + "img_width": 1692, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "666": { + "question_id": "666", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red objects. Subtract all big green things. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "668": { + "question_id": "668", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does the first symbol in the legend represent the smallest category ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 560, + "img_width": 775, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "670": { + "question_id": "670", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which date of Meeting was the most number of shares transferred?\nChoices:\n(A) 04/06/2005\n(B) 04/02/2005\n(C) 04/05/2005\n(D) 04/03/2005\n(E) 04/04/2005", + "choices": [ + "04/06/2005", + "04/02/2005", + "04/05/2005", + "04/03/2005", + "04/04/2005" + ], + "answer": "04/02/2005", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "04/06/2005", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2135, + "img_width": 1582, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "672": { + "question_id": "672", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the twig to the nearest inch. The twig is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 169, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "674": { + "question_id": "674", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, CDE is an equilateral triangle and ABCE is a square with an area of 1. What is the perimeter of polygon ABCDE?\nChoices:\n(A) 4\n(B) 5\n(C) 6\n(D) 7\n(E) 8", + "choices": [ + "4", + "5", + "6", + "7", + "8" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 89, + "img_width": 125, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "676": { + "question_id": "676", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "678": { + "question_id": "678", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x\nChoices:\n(A) 21\n(B) 34\n(C) 58\n(D) 67", + "choices": [ + "21", + "34", + "58", + "67" + ], + "answer": "58", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "21", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 149, + "img_width": 267, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "680": { + "question_id": "680", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "5", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 303, + "img_width": 440, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "682": { + "question_id": "682", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, if all the grass dies then population of squirrel will\nChoices:\n(A) decrease\n(B) remains the same\n(C) increase\n(D) NA", + "choices": [ + "decrease", + "remains the same", + "increase", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 592, + "img_width": 864, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "684": { + "question_id": "684", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{CH} \\cong \\overline{KJ}$. Find $x$.\nChoices:\n(A) 27\n(B) 54\n(C) 55\n(D) 83", + "choices": [ + "27", + "54", + "55", + "83" + ], + "answer": "55", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "27", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 444, + "img_width": 608, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "686": { + "question_id": "686", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function invertible?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 442, + "img_width": 731, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "688": { + "question_id": "688", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the minimum age group shown in the \u2018plots\u2019?\nChoices:\n(A) 11-15\n(B) 21-25\n(C) 6-10\n(D) 16-20\n(E) 0-5", + "choices": [ + "11-15", + "21-25", + "6-10", + "16-20", + "0-5" + ], + "answer": "0-5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "11-15", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2136, + "img_width": 3160, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "690": { + "question_id": "690", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram above, lines M and N are parallel. All of the following are true except\nChoices:\n(A) a + b = j + l\n(B) g = h\n(C) c + f = f + b\n(D) g + e + f + h = 360\n(E) d + e = f + j", + "choices": [ + "a + b = j + l", + "g = h", + "c + f = f + b", + "g + e + f + h = 360", + "d + e = f + j" + ], + "answer": "d + e = f + j", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a + b = j + l", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 558, + "img_width": 625, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "692": { + "question_id": "692", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the given food chain if grasses dried up in summer, what is likely to happen?\nChoices:\n(A) Grasshoppers will decrease.\n(B) shrews will become extinct\n(C) owls will increase.\n(D) None of the above", + "choices": [ + "Grasshoppers will decrease.", + "shrews will become extinct", + "owls will increase.", + "None of the above" + ], + "answer": "Grasshoppers will decrease.", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Grasshoppers will decrease.", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 189, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "694": { + "question_id": "694", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u83f1\u5f62ABCD\u4e2d\uff0cM\u3001N\u5206\u522b\u662fBC\u548cCD\u7684\u4e2d\u70b9\uff0cNP\u22a5AB\u4e8e\u70b9P\uff0c\u8fde\u63a5MP\uff0e\u82e5\u2220DAB\uff1d40\u00b0\uff0c\u5219\u2220MPB\uff1d\uff08\uff09\nChoices:\n(A) 125\u00b0\n(B) 120\u00b0\n(C) 115\u00b0\n(D) 110\u00b0", + "choices": [ + "125\u00b0", + "120\u00b0", + "115\u00b0", + "110\u00b0" + ], + "answer": "110\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "125\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 85, + "img_width": 158, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "696": { + "question_id": "696", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Erica has $1,525.00. Does she have enough to buy a motorcycle and a canoe?'\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 192, + "img_width": 214, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "698": { + "question_id": "698", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the triangle in the figure above, what is the value of x?\nChoices:\n(A) 2*\\sqrt{3}\n(B) 6*\\sqrt{2}\n(C) 6*\\sqrt{3}\n(D) 6\n(E) 12", + "choices": [ + "2*\\sqrt{3}", + "6*\\sqrt{2}", + "6*\\sqrt{3}", + "6", + "12" + ], + "answer": "2*\\sqrt{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2*\\sqrt{3}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 376, + "img_width": 615, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "700": { + "question_id": "700", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u2299O\u662f\u25b3ABC\u7684\u5916\u63a5\u5706\uff0cAB\uff1dBC\uff1d4\uff0c\u628a\u5f27AB\u6cbf\u5f26AB\u5411\u4e0b\u6298\u53e0\u4ea4BC\u4e8e\u70b9D\uff0c\u82e5\u70b9D\u4e3aBC\u4e2d\u70b9\uff0c\u5219AC\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 2\u221a{2}\n(D) \u221a{6}", + "choices": [ + "1", + "2", + "2\u221a{2}", + "\u221a{6}" + ], + "answer": "2\u221a{2}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 132, + "img_width": 144, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "702": { + "question_id": "702", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is cumulative increase in weight ( in grams) for \"GROUP A\" in third week ( give an approximate value) ?", + "choices": null, + "answer": "400", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2237, + "img_width": 1754, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "704": { + "question_id": "704", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which two puzzle pieces form the larger square?\nChoices:\n(A) 1 & 2\n(B) 1 & 3\n(C) 1 & 4\n(D) 2 & 3\n(E) 2 & 4", + "choices": [ + "1 & 2", + "1 & 3", + "1 & 4", + "2 & 3", + "2 & 4" + ], + "answer": "1 & 3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1 & 2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 440, + "img_width": 396, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "706": { + "question_id": "706", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the image of the dot (8,-2) under a clockwise rotation by 270\u00b0 about the origin.\"\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "C", + "extraction": "B", + "prediction": "B", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 432, + "img_width": 438, + "language": "english", + "skills": [ + "logical reasoning", + "geometry reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "708": { + "question_id": "708", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the light source P is directly above the crossbar AB, the shadow of AB under the light is CD, AB \u2225 CD, AB = 2.0, CD = 5.0, the distance between point P and CD is 3.0, then the distance between AB and CD is ().\nChoices:\n(A) \\frac{6}{5}\n(B) \\frac{7}{6}\n(C) \\frac{9}{5}\n(D) \\frac{15}{2}", + "choices": [ + "\\frac{6}{5}", + "\\frac{7}{6}", + "\\frac{9}{5}", + "\\frac{15}{2}" + ], + "answer": "\\frac{9}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{6}{5}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 156, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "710": { + "question_id": "710", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1555, + "img_width": 2293, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "712": { + "question_id": "712", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "9", + "extraction": "9", + "prediction": "9", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 244, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "714": { + "question_id": "714", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of large brown rubber motorbikes in front of the big motorbike greater than the number of big green sedans?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "716": { + "question_id": "716", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find y.\nChoices:\n(A) 16 \\sqrt { 2 }\n(B) 16 \\sqrt { 3 }\n(C) 32\n(D) 16 \\sqrt { 5 }", + "choices": [ + "16 \\sqrt { 2 }", + "16 \\sqrt { 3 }", + "32", + "16 \\sqrt { 5 }" + ], + "answer": "16 \\sqrt { 5 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "16 \\sqrt { 2 }", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 196, + "img_width": 427, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "718": { + "question_id": "718", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Jeffrey is the proud owner of an eclectic bow tie collection. He keeps track of how many bow ties he has, and organizes them by pattern and material. What is the probability that a randomly selected bow tie is designed with swirls and is made of velvet? Simplify any fractions.'", + "choices": null, + "answer": "0.21", + "extraction": "0.17", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 94, + "img_width": 215, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "720": { + "question_id": "720", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When does the function value first reach 2?", + "choices": null, + "answer": "2", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 350, + "img_width": 362, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "722": { + "question_id": "722", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Deep Sky Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 677, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "724": { + "question_id": "724", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Rebecca Purple have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 638, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "726": { + "question_id": "726", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x. Assume that any segment that appears to be tangent is tangent.\nChoices:\n(A) 10\n(B) 30\n(C) 90\n(D) 120", + "choices": [ + "10", + "30", + "90", + "120" + ], + "answer": "10", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 199, + "img_width": 228, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "728": { + "question_id": "728", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 69, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "730": { + "question_id": "730", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In which year the market share of KLA is highest?", + "choices": null, + "answer": "2019", + "extraction": "2014", + "prediction": "2014", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "732": { + "question_id": "732", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism would be most affected if there was a shortage of plants?\nChoices:\n(A) Grasshopper\n(B) Snake\n(C) Mouse\n(D) Hawk", + "choices": [ + "Grasshopper", + "Snake", + "Mouse", + "Hawk" + ], + "answer": "Grasshopper", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Grasshopper", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1080, + "img_width": 1152, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "734": { + "question_id": "734", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer double buss that are behind the aeroplane than things on the left side of the yellow double bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "736": { + "question_id": "736", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5df2\u77e5\u76f4\u7ebfa\u2225b\uff0c\u76f4\u89d2\u4e09\u89d2\u5f62ABC\u4e2d\uff0c\u2220C\uff1d90\u00b0\uff0c\u82e5\u2220B\uff1d58\u00b0\uff0c\u90a3\u4e48\u22201\ufe63\u22202\uff1d\uff08\uff09\nChoices:\n(A) 28\u00b0\n(B) 30\u00b0\n(C) 32\u00b0\n(D) 58\u00b0", + "choices": [ + "28\u00b0", + "30\u00b0", + "32\u00b0", + "58\u00b0" + ], + "answer": "32\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "28\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 154, + "img_width": 226, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "738": { + "question_id": "738", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function continuous?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 268, + "img_width": 383, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "740": { + "question_id": "740", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What percent of the stands are full?\nChoices:\n(A) 15\n(B) 100\n(C) 50\n(D) 50", + "choices": [ + "15", + "100", + "50", + "50" + ], + "answer": "15", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 375, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "742": { + "question_id": "742", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the twig to the nearest inch. The twig is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 159, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "744": { + "question_id": "744", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If RL = 5, RT = 9, and WS = 6, find RW.\nChoices:\n(A) 5.4\n(B) 6\n(C) 6.6\n(D) 7.5", + "choices": [ + "5.4", + "6", + "6.6", + "7.5" + ], + "answer": "7.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5.4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 199, + "img_width": 404, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "746": { + "question_id": "746", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Mrs. Zimmerman hosts an annual art contest for kids, and she keeps a record of the number of entries each year. According to the table, what was the rate of change between 2013 and 2014? (Unit: entries per year)", + "choices": null, + "answer": "7", + "extraction": "13", + "prediction": "13", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 199, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "748": { + "question_id": "748", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, PA and PB are tangents of \u2299O, the tangent point of point A and B, AC is the diameter of \u2299O, given that \u2220P = 50.0, then the size of \u2220ACB is ()\nChoices:\n(A) 65\u00b0\n(B) 60\u00b0\n(C) 55\u00b0\n(D) 50\u00b0", + "choices": [ + "65\u00b0", + "60\u00b0", + "55\u00b0", + "50\u00b0" + ], + "answer": "65\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 117, + "img_width": 207, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "750": { + "question_id": "750", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "18", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 356, + "img_width": 290, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "752": { + "question_id": "752", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cPA\u662f\u2299O\u7684\u5207\u7ebf\uff0c\u5207\u70b9\u4e3aA\uff0cOP\uff1d4\uff0c\u2220APO\uff1d30\u00b0\uff0c\u5219\u2299O\u7684\u534a\u5f84\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) \u221a{3}\n(C) 2\n(D) 3", + "choices": [ + "1", + "\u221a{3}", + "2", + "3" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 87, + "img_width": 122, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "754": { + "question_id": "754", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Base your answers on the diagram below, which shows a partial food web. What will happen to fish population if algae's are decreased?\nChoices:\n(A) Population will decrease\n(B) Population will remain the same\n(C) Population will increase\n(D) None of the above", + "choices": [ + "Population will decrease", + "Population will remain the same", + "Population will increase", + "None of the above" + ], + "answer": "Population will decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Population will decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 364, + "img_width": 464, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "756": { + "question_id": "756", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the trees died, the population of porcupine would most likely\nChoices:\n(A) double\n(B) skyrocket\n(C) decrease\n(D) increase", + "choices": [ + "double", + "skyrocket", + "decrease", + "increase" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "double", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 591, + "img_width": 765, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "758": { + "question_id": "758", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny purple trucks behind the small matte motorbike less than the number of fighters that are behind the big metal utility bike?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "760": { + "question_id": "760", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of yellow tandem bikes less than the number of big objects?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "762": { + "question_id": "762", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the center of symmetry of this function?\nChoices:\n(A) (0, 0)\n(B) (-1, 0)\n(C) (2, 0)", + "choices": [ + "(0, 0)", + "(-1, 0)", + "(2, 0)" + ], + "answer": "(0, 0)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(0, 0)", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 395, + "img_width": 500, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "764": { + "question_id": "764", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average number of bananas on each stock?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 349, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "766": { + "question_id": "766", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tiny red trucks than small blue bicycles?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "768": { + "question_id": "768", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use the graph to answer the question below. Which month is the hottest on average in Rome?\nChoices:\n(A) December, January, and February\n(B) July and August\n(C) March and April", + "choices": [ + "December, January, and February", + "July and August", + "March and April" + ], + "answer": "July and August", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "December, January, and February", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "elementary school", + "img_height": 323, + "img_width": 448, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "770": { + "question_id": "770", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the amplitude of this function?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 276, + "img_width": 482, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "772": { + "question_id": "772", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of small yellow shiny motorbikes greater than the number of red rubber fighters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "774": { + "question_id": "774", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer large matte utility bikes than small yellow bicycles?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "776": { + "question_id": "776", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $JQ$ if $Q$ is the incenter of $\\triangle JLN$. Rounded to the nearest hundredth.\nChoices:\n(A) 16.50\n(B) 18.79\n(C) 20.32\n(D) 25.50", + "choices": [ + "16.50", + "18.79", + "20.32", + "25.50" + ], + "answer": "18.79", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "16.50", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 424, + "img_width": 589, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "778": { + "question_id": "778", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you find the missing shape in this picture puzzle?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D", + "choices": [ + "A", + "B", + "C", + "D" + ], + "answer": "A", + "extraction": "A", + "prediction": "A", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 431, + "img_width": 797, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "780": { + "question_id": "780", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "7", + "extraction": "7", + "prediction": "7", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 209, + "img_width": 848, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "782": { + "question_id": "782", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "4", + "extraction": "18", + "prediction": "18", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 376, + "img_width": 384, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "784": { + "question_id": "784", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Across all years, what is the maximum rating of statistical capacity in Maldives ?", + "choices": null, + "answer": "70", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 938, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "786": { + "question_id": "786", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle K$\nChoices:\n(A) 6\n(B) 60\n(C) 100\n(D) 180", + "choices": [ + "6", + "60", + "100", + "180" + ], + "answer": "100", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 237, + "img_width": 317, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "788": { + "question_id": "788", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 332, + "img_width": 515, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "790": { + "question_id": "790", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u25b3ABC\u4e2d\uff0cN\u662fBC\u8fb9\u4e0a\u7684\u4e2d\u70b9\uff0cAM\u5e73\u5206\u2220BAC\uff0cBM\u22a5AM\u4e8e\u70b9M\uff0c\u82e5AB\uff1d8\uff0cMN\uff1d2\uff0e\u5219AC\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 10\n(B) 11\n(C) 12\n(D) 13", + "choices": [ + "10", + "11", + "12", + "13" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 105, + "img_width": 145, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "792": { + "question_id": "792", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2624, + "img_width": 3936, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "794": { + "question_id": "794", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values larger than 4?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "796": { + "question_id": "796", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1938, + "img_width": 2516, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "798": { + "question_id": "798", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, l || m. Which of the following must equal 180?\nChoices:\n(A) k + n + r\n(B) k + p + s\n(C) n + p + s\n(D) n + p + t\n(E) r + s + t", + "choices": [ + "k + n + r", + "k + p + s", + "n + p + s", + "n + p + t", + "r + s + t" + ], + "answer": "k + p + s", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "k + n + r", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 372, + "img_width": 371, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "800": { + "question_id": "800", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Medium Orchid intersect Forest Green?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 596, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "802": { + "question_id": "802", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Karen bought 4 pounds of silk scraps and 4 pounds of canvas scraps. How much did she spend? (Unit: $)", + "choices": null, + "answer": "69", + "extraction": "13", + "prediction": "13", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 194, + "img_width": 243, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "804": { + "question_id": "804", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\odot B$, $CE=13.5$. Find $BD$. Round to the nearest hundredth.\nChoices:\n(A) 3.71\n(B) 4.29\n(C) 4.53\n(D) 6.75", + "choices": [ + "3.71", + "4.29", + "4.53", + "6.75" + ], + "answer": "4.29", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3.71", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 524, + "img_width": 493, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "806": { + "question_id": "806", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, and point C is on \u2299O. If \u2220A = 40.0, then the degree of \u2220B is ()\nChoices:\n(A) 80\u00b0\n(B) 60\u00b0\n(C) 50\u00b0\n(D) 40\u00b0", + "choices": [ + "80\u00b0", + "60\u00b0", + "50\u00b0", + "40\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "80\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 107, + "img_width": 127, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "808": { + "question_id": "808", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large purple spheres. Subtract all small gray things. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "810": { + "question_id": "810", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow metallic balls. Subtract all small yellow shiny things. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "812": { + "question_id": "812", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does the gray bar always have smaller value?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 1286, + "img_width": 840, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "814": { + "question_id": "814", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest individual bar in the whole chart?", + "choices": null, + "answer": "100000000", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "816": { + "question_id": "816", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x. Round to the nearest tenth, if necessary.\nChoices:\n(A) 3\n(B) 9\n(C) 12.25\n(D) 24", + "choices": [ + "3", + "9", + "12.25", + "24" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 272, + "img_width": 379, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "818": { + "question_id": "818", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What's the ratio of least value of light brown graph and leftmost value of dark brown graph?", + "choices": null, + "answer": "0.32", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 434, + "img_width": 310, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "820": { + "question_id": "820", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $a=14, b=48,$ and $c=50$ find $cosA$\nChoices:\n(A) 0.14\n(B) 0.48\n(C) 0.50\n(D) 0.96", + "choices": [ + "0.14", + "0.48", + "0.50", + "0.96" + ], + "answer": "0.96", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.14", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 160, + "img_width": 238, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "822": { + "question_id": "822", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the perimeter of the parallelogram. Round to the nearest tenth if necessary.\nChoices:\n(A) 22\n(B) 40\n(C) 44\n(D) 48", + "choices": [ + "22", + "40", + "44", + "48" + ], + "answer": "44", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "22", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 227, + "img_width": 356, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "824": { + "question_id": "824", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)", + "choices": null, + "answer": "0.13", + "extraction": "0.97", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 192, + "img_width": 247, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "826": { + "question_id": "826", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is the largest part of the lung?\nChoices:\n(A) Inferior lobes\n(B) Cardiac notch\n(C) Superior lobes\n(D) Middle lobe", + "choices": [ + "Inferior lobes", + "Cardiac notch", + "Superior lobes", + "Middle lobe" + ], + "answer": "Superior lobes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Inferior lobes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 479, + "img_width": 638, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "828": { + "question_id": "828", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Linda wants to buy 0.9 pounds of double chocolate cookie dough. How much will she spend? (Unit: $)", + "choices": null, + "answer": "2.7", + "extraction": "3.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 194, + "img_width": 357, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "830": { + "question_id": "830", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "2", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 870, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "832": { + "question_id": "832", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(0)?", + "choices": null, + "answer": "-2", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1920, + "img_width": 1920, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "834": { + "question_id": "834", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the states that border Georgia , does Florida have the lowest value ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 610, + "img_width": 785, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "836": { + "question_id": "836", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the smallest species shown?\nChoices:\n(A) chinlea\n(B) arganodus\n(C) semionotus\n(D) xenacanthus", + "choices": [ + "chinlea", + "arganodus", + "semionotus", + "xenacanthus" + ], + "answer": "semionotus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "chinlea", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1076, + "img_width": 1500, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "838": { + "question_id": "838", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1200, + "img_width": 1600, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "840": { + "question_id": "840", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which item can you get the most protein?\nChoices:\n(A) salami\n(B) wine\n(C) cheese\n(D) bread", + "choices": [ + "salami", + "wine", + "cheese", + "bread" + ], + "answer": "salami", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "salami", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 375, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "842": { + "question_id": "842", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At a certain moment, there is a passenger ship at sea point P, and lighthouse A is measured in the direction 30.0 north by east of P, and is 50.0 nautical miles away. The passenger ship sails at the speed of 60.0 nautical mile/hour in the direction of 60.0 from north by west for $\\frac{2.0}{3.0}$hours to reach point B, then tan\u2220BAP = ()\nChoices:\n(A) \\frac{4}{5}\n(B) \\frac{6}{5}\n(C) \\frac{\u221a{5}}{5}\n(D) \\frac{2\u221a{5}}{5}", + "choices": [ + "\\frac{4}{5}", + "\\frac{6}{5}", + "\\frac{\u221a{5}}{5}", + "\\frac{2\u221a{5}}{5}" + ], + "answer": "\\frac{4}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{4}{5}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 115, + "img_width": 154, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "844": { + "question_id": "844", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the larger window shaped like the smaller window?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "846": { + "question_id": "846", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Brown the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 758, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "848": { + "question_id": "848", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the tuberculosis treatment success rate in Bulgaria greater than the average tuberculosis treatment success rate in Bulgaria taken over all years ?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1091, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "850": { + "question_id": "850", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of cars in front of the tiny metal thing less than the number of large matte things in front of the cyan rubber road bike?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "852": { + "question_id": "852", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "40", + "extraction": "13", + "prediction": "13", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 598, + "img_width": 612, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "854": { + "question_id": "854", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the pelicans in the community were eradicated, which population feel the most direct effect?\nChoices:\n(A) Plant\n(B) Phyto-plankton\n(C) Fish\n(D) Lizard", + "choices": [ + "Plant", + "Phyto-plankton", + "Fish", + "Lizard" + ], + "answer": "Fish", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Plant", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 947, + "img_width": 850, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "856": { + "question_id": "856", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture has the least leaves?\nChoices:\n(A) Both\n(B) Compound\n(C) Simple\n(D) Neither", + "choices": [ + "Both", + "Compound", + "Simple", + "Neither" + ], + "answer": "Simple", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Both", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 300, + "img_width": 400, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "858": { + "question_id": "858", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On the basis of the given food web, which organism will increase in number if there were no seals?\nChoices:\n(A) Shark\n(B) Small Shrimp\n(C) Octopus\n(D) Mysid Shrimp", + "choices": [ + "Shark", + "Small Shrimp", + "Octopus", + "Mysid Shrimp" + ], + "answer": "Octopus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Shark", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 764, + "img_width": 1162, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "860": { + "question_id": "860", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Miss Foley ran a sit-up competition among her P.E. students and monitored how many sit-ups each students could do. What is the largest number of sit-ups done? (Unit: sit-ups)", + "choices": null, + "answer": "86", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 246, + "img_width": 291, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "862": { + "question_id": "862", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: One of the most dramatic videos on the web (but entirely fictitious) supposedly shows a man sliding along a long water slide and then being launched into the air to land in a water pool. Let's attach some reasonable numbers to such a flight to calculate the velocity with which the man would have hit the water. Figure indicates the launch and landing sites and includes a superimposed coordinate system with its origin conveniently located at the launch site. From the video we take the horizontal flight distance as $D=20.0 \\mathrm{~m}$, the flight time as $t=2.50 \\mathrm{~s}$, and the launch angle as $\\theta_0=40.0^{\\circ}$. Find the magnitude of the velocity at launch and at landing.", + "choices": null, + "answer": "10.44", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 600, + "img_width": 1302, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "864": { + "question_id": "864", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "16", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1738, + "img_width": 2480, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "866": { + "question_id": "866", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For trapezoid $Q R S T, A$ and $B$ are midpoints of the legs. Find $m \\angle S$\nChoices:\n(A) 45\n(B) 60\n(C) 120\n(D) 135", + "choices": [ + "45", + "60", + "120", + "135" + ], + "answer": "135", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 169, + "img_width": 359, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "868": { + "question_id": "868", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big green cylinders. Subtract all rubber cylinders. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "870": { + "question_id": "870", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there more tiny motorbikes in front of the small cyan tandem bike than big cyan metal double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "872": { + "question_id": "872", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the next shape.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D", + "choices": [ + "A", + "B", + "C", + "D" + ], + "answer": "D", + "extraction": "A", + "prediction": "A", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 496, + "img_width": 1472, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "874": { + "question_id": "874", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of y at x=-2.5?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 479, + "img_width": 479, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "876": { + "question_id": "876", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, square $ABDC$ is inscribed in $\\odot K$. Find the measure of a central angle.\nChoices:\n(A) 45\n(B) 60\n(C) 90\n(D) 180", + "choices": [ + "45", + "60", + "90", + "180" + ], + "answer": "90", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 275, + "img_width": 273, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "878": { + "question_id": "878", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220ACB\uff1d90\u00b0\uff0c\u4ee5Rt\u25b3ABC\u7684\u4e09\u8fb9\u4e3a\u8fb9\u5411\u5916\u4f5c\u6b63\u65b9\u5f62\uff0c\u5176\u9762\u79ef\u5206\u522b\u4e3aS1\uff0cS2\uff0cS3\uff0c\u4e14S1\uff1d5\uff0cS3\uff1d16\uff0c\u5219S2\uff1d\uff08\uff09\nChoices:\n(A) 6\n(B) 2\u221a{2}\n(C) 11\n(D) 24", + "choices": [ + "6", + "2\u221a{2}", + "11", + "24" + ], + "answer": "11", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 82, + "img_width": 94, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "880": { + "question_id": "880", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What's the total add up value of largest and smallest bar?", + "choices": null, + "answer": "252.65", + "extraction": "1.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "882": { + "question_id": "882", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Lawn Green the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 677, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "884": { + "question_id": "884", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the blue kite in the lower right corner shaped like?\nChoices:\n(A) ferret\n(B) cat\n(C) cloud\n(D) octopus", + "choices": [ + "ferret", + "cat", + "cloud", + "octopus" + ], + "answer": "octopus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "ferret", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "886": { + "question_id": "886", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A newspaper researched how many grocery stores there are in each town. What is the median of the numbers?'", + "choices": null, + "answer": "6", + "extraction": "6", + "prediction": "6", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 235, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "888": { + "question_id": "888", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small green shiny balls. Subtract all small metallic things. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "890": { + "question_id": "890", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is larger the moon or the sun?\nChoices:\n(A) Sun\n(B) It varies\n(C) They are equal in size\n(D) Moon", + "choices": [ + "Sun", + "It varies", + "They are equal in size", + "Moon" + ], + "answer": "Sun", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Sun", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 844, + "img_width": 1500, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "892": { + "question_id": "892", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does New Jersey have a higher value than Georgia ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "894": { + "question_id": "894", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms fat and acre?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "896": { + "question_id": "896", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Approximately, what percentage of jewelry sales in January were Rings?\nChoices:\n(A) Around 21%\n(B) Around 27%\n(C) Around 31%\n(D) Around 37%", + "choices": [ + "Around 21%", + "Around 27%", + "Around 31%", + "Around 37%" + ], + "answer": "Around 31%", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Around 21%", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "bar chart", + "grade": "elementary school", + "img_height": 464, + "img_width": 758, + "language": "english", + "skills": [ + "logical reasoning", + "statistical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "898": { + "question_id": "898", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, A, B, and C are the three points on \u2299O, if \u2220C = 35.0, then the degree of \u2220OAB is ()\nChoices:\n(A) 35\u00b0\n(B) 55\u00b0\n(C) 65\u00b0\n(D) 70\u00b0", + "choices": [ + "35\u00b0", + "55\u00b0", + "65\u00b0", + "70\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 109, + "img_width": 112, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "900": { + "question_id": "900", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of rubber cars less than the number of brown jets?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "902": { + "question_id": "902", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the leaf base has an angle greater than 90 degrees, what is it called?\nChoices:\n(A) obtuse\n(B) decurrent\n(C) cuneate\n(D) acute", + "choices": [ + "obtuse", + "decurrent", + "cuneate", + "acute" + ], + "answer": "obtuse", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "obtuse", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1429, + "img_width": 1500, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "904": { + "question_id": "904", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "906": { + "question_id": "906", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sum of smallest two value is greater then then largest value?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "908": { + "question_id": "908", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: which organism would most likely have a decrease in its population if decrease the population of ant base of above diagram?\nChoices:\n(A) plant\n(B) human\n(C) lizard\n(D) snake", + "choices": [ + "plant", + "human", + "lizard", + "snake" + ], + "answer": "lizard", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "plant", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 497, + "img_width": 312, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "910": { + "question_id": "910", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue metal balls. Subtract all large matte things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "912": { + "question_id": "912", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 413, + "img_width": 629, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "914": { + "question_id": "914", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny purple shiny cubes. Subtract all large purple balls. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "916": { + "question_id": "916", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220C = 90.0, \u2220A = 30.0, BC = 2.0, the radius of \u2299C is 1.0, point P is the point on the hypotenuse AB, passing point P is a tangent PQ of \u2299C (Point Q is the tangent point), then the minimum value of the line segment PQ is ()\nChoices:\n(A) 2\n(B) \u221a{3}\n(C) \u221a{2}\n(D) 2-\\frac{\u221a{3}}{3}", + "choices": [ + "2", + "\u221a{3}", + "\u221a{2}", + "2-\\frac{\u221a{3}}{3}" + ], + "answer": "\u221a{2}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 145, + "img_width": 112, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "918": { + "question_id": "918", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Calculate the missing item.", + "choices": null, + "answer": "1", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 492, + "img_width": 538, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "920": { + "question_id": "920", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The measure of angle BAC equals x*\\degree. What is the value of x?", + "choices": null, + "answer": "30", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 310, + "img_width": 388, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "922": { + "question_id": "922", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual element in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "924": { + "question_id": "924", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Periwinkle have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 587, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "926": { + "question_id": "926", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the size of the shaded area under the curve? Round the answer to 2 decimal places", + "choices": null, + "answer": "7.07", + "extraction": "0.43", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 312, + "img_width": 433, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "928": { + "question_id": "928", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much more does a navy blue bath mat cost than a yellow bath towel? (Unit: $)", + "choices": null, + "answer": "5", + "extraction": "17", + "prediction": "17", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 160, + "img_width": 234, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "930": { + "question_id": "930", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cF\u662f\u25b3ABC\u7684\u89d2\u5e73\u5206\u7ebfCD\u548cBE\u7684\u4ea4\u70b9\uff0cCG\u22a5AB\u4e8e\u70b9G\uff0e\u82e5\u2220ACG\uff1d32\u00b0\uff0c\u5219\u2220BFC\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 119\u00b0\n(B) 122\u00b0\n(C) 148\u00b0\n(D) 150\u00b0", + "choices": [ + "119\u00b0", + "122\u00b0", + "148\u00b0", + "150\u00b0" + ], + "answer": "119\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "119\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 79, + "img_width": 113, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "932": { + "question_id": "932", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to the phytoplankton if krill increased?\nChoices:\n(A) decrease\n(B) increase\n(C) can't be predicted\n(D) stay the same", + "choices": [ + "decrease", + "increase", + "can't be predicted", + "stay the same" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 350, + "img_width": 750, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "934": { + "question_id": "934", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "10000", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "936": { + "question_id": "936", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 892, + "img_width": 710, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "938": { + "question_id": "938", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, $m \u22209 = 75$. Find the measure of $\\angle 6$.\nChoices:\n(A) 75\n(B) 85\n(C) 95\n(D) 105", + "choices": [ + "75", + "85", + "95", + "105" + ], + "answer": "105", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "75", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 278, + "img_width": 417, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "940": { + "question_id": "940", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big red things. Subtract all metallic things. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "942": { + "question_id": "942", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(0)?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 395, + "img_width": 500, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "944": { + "question_id": "944", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 241, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "946": { + "question_id": "946", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "16", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 373, + "img_width": 560, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "948": { + "question_id": "948", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Some students compared how many blocks they live from school. What is the mean of the numbers?'", + "choices": null, + "answer": "11", + "extraction": "14", + "prediction": "14", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 311, + "img_width": 207, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "950": { + "question_id": "950", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The slope of f(x) at x=0 is ____\nChoices:\n(A) positive\n(B) negative\n(C) zero\n(D) undefined", + "choices": [ + "positive", + "negative", + "zero", + "undefined" + ], + "answer": "positive", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "positive", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 744, + "img_width": 1114, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "952": { + "question_id": "952", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Base your answers on the food web below and on your knowledge of biology. A decrease in the Aquatic crustaceans population will most immediately decrease the available energy for the\nChoices:\n(A) Minnows\n(B) Ducks\n(C) Fish\n(D) Raccoons", + "choices": [ + "Minnows", + "Ducks", + "Fish", + "Raccoons" + ], + "answer": "Fish", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Minnows", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 258, + "img_width": 456, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "954": { + "question_id": "954", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A partial food web is shown below. Which of the following will most likely happen if the snake population decreases?\nChoices:\n(A) Cricket will increase\n(B) Mouse will increase\n(C) Rabbit will increase\n(D) All of above", + "choices": [ + "Cricket will increase", + "Mouse will increase", + "Rabbit will increase", + "All of above" + ], + "answer": "All of above", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Cricket will increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 277, + "img_width": 475, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "956": { + "question_id": "956", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small blue rubber objects. Subtract all brown shiny balls. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "958": { + "question_id": "958", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the missing letters from below to form a word, using all letters presented\nChoices:\n(A) A, R, N\n(B) R, D, N\n(C) I, A, M\n(D) H, O, W", + "choices": [ + "A, R, N", + "R, D, N", + "I, A, M", + "H, O, W" + ], + "answer": "R, D, N", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "A, R, N", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 773, + "img_width": 945, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "960": { + "question_id": "960", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1365, + "img_width": 2048, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "962": { + "question_id": "962", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The value of y at x=10 is ____ that at x=70.\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "smaller than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 301, + "img_width": 387, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "964": { + "question_id": "964", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 70, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "966": { + "question_id": "966", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the pencil to the nearest inch. The pencil is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "7", + "prediction": "7", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 166, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "968": { + "question_id": "968", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue balls. Subtract all big yellow rubber balls. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "970": { + "question_id": "970", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u4e24\u76f4\u7ebfa\uff0cb\u88ab\u76f4\u7ebfc\u6240\u622a\uff0c\u5df2\u77e5a\u2225b\uff0c\u22201\uff1d62\u00b0\uff0c\u5219\u22202\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 62\u00b0\n(B) 108\u00b0\n(C) 118\u00b0\n(D) 128\u00b0", + "choices": [ + "62\u00b0", + "108\u00b0", + "118\u00b0", + "128\u00b0" + ], + "answer": "118\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "62\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 141, + "img_width": 135, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "972": { + "question_id": "972", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of yellow shiny utility bikes greater than the number of brown metallic cruisers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "974": { + "question_id": "974", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there the same number of big blue trucks and large purple metal double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "976": { + "question_id": "976", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of metal biplanes behind the purple shiny object less than the number of purple school buss behind the big red object?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "978": { + "question_id": "978", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Allie kept a written log of how many miles she biked during the past 7 days. What is the range of the numbers?'", + "choices": null, + "answer": "7", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 280, + "img_width": 230, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "980": { + "question_id": "980", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest number shown?", + "choices": null, + "answer": "12", + "extraction": "12", + "prediction": "12", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 640, + "img_width": 429, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "982": { + "question_id": "982", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the states that border Wyoming , does South Dakota have the highest value ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "984": { + "question_id": "984", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of gray cars less than the number of small metallic minivans?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "986": { + "question_id": "986", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0cAD\u662f\u89d2\u5e73\u5206\u7ebf\uff0cAE\u662f\u9ad8\uff0e\u82e5\u2220B\uff1d40\u00b0\uff0c\u2220C\uff1d70\u00b0\uff0c\u5219\u2220EAD\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 10\u00b0\n(B) 15\u00b0\n(C) 17.5\u00b0\n(D) 20\u00b0", + "choices": [ + "10\u00b0", + "15\u00b0", + "17.5\u00b0", + "20\u00b0" + ], + "answer": "15\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 68, + "img_width": 101, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "988": { + "question_id": "988", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 333, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "990": { + "question_id": "990", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\odot S$, $m \\widehat {PQR}=98$, Find $m \\widehat {PQ}$.\nChoices:\n(A) 45\n(B) 49\n(C) 90\n(D) 98", + "choices": [ + "45", + "49", + "90", + "98" + ], + "answer": "49", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 452, + "img_width": 544, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "992": { + "question_id": "992", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of purple metallic things that are behind the small green motorbike less than the number of blue metal articulated buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "994": { + "question_id": "994", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Magenta greater than Web Maroon?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 548, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "996": { + "question_id": "996", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big shiny balls. Subtract all blue rubber blocks. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "998": { + "question_id": "998", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff1a\u2220AOB\uff1a\u2220BOC\uff1a\u2220COD\uff1d2\uff1a3\uff1a4\uff0c\u5c04\u7ebfOM\u3001ON\uff0c\u5206\u522b\u5e73\u5206\u2220AOB\u4e0e\u2220COD\uff0c\u53c8\u2220MON\uff1d84\u00b0\uff0c\u5219\u2220AOB\u4e3a\uff08\uff09\nChoices:\n(A) 28\u00b0\n(B) 30\u00b0\n(C) 32\u00b0\n(D) 38\u00b0", + "choices": [ + "28\u00b0", + "30\u00b0", + "32\u00b0", + "38\u00b0" + ], + "answer": "28\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "28\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 181, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "1000": { + "question_id": "1000", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown matte cylinders. Subtract all big purple matte things. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + } +} \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mathvista_testmini.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mathvista_testmini.json new file mode 100644 index 0000000000000000000000000000000000000000..c2f5cf778063fee4a585b6f4f289fadab09a4a50 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mathvista_testmini.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ba84a4cbb0576f85800446579c3dcd431f37dd7139cdb6f7db6663823358fd2 +size 45272253 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mme.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mme.json new file mode 100644 index 0000000000000000000000000000000000000000..8d0ceef7a5eb8d7e6e91b06542444600ccbe5e4f --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mme.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:011e40f78ee9008a61b460145cf49396d68e89126511bce959ec55b38e3f0158 +size 94631375 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mmmu_val.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mmmu_val.json new file mode 100644 index 0000000000000000000000000000000000000000..779052834020af922e1f4e9b15b48f852b64067f --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mmmu_val.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57f900a5b0df3eef25875d5833f0bc4b017bf4b6013ba00120ee1b519f1f2f05 +size 36750618 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mmstar.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mmstar.json new file mode 100644 index 0000000000000000000000000000000000000000..64151e7b7c1f188f075e8399d0b60e63c535dd06 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mmstar.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c0937b93e04b8698a8f641913eb0ae96114a2bdf93057c229af5a93cf653bd2 +size 60427356 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/rank0_metric_eval_done.txt b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/rank0_metric_eval_done.txt new file mode 100644 index 0000000000000000000000000000000000000000..b9c064df42468d805177a80623c54c976c8d760e --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/rank0_metric_eval_done.txt @@ -0,0 +1 @@ +rank 0 eval done \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/rank1_metric_eval_done.txt b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/rank1_metric_eval_done.txt new file mode 100644 index 0000000000000000000000000000000000000000..36792c9cedb6c006db3a866d72eac15f0ce6a64a --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/rank1_metric_eval_done.txt @@ -0,0 +1 @@ +rank 1 eval done \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/results.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/results.json new file mode 100644 index 0000000000000000000000000000000000000000..360baf47970dcdfb67de093a17c86cdb33ec7ad3 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/results.json @@ -0,0 +1,285 @@ +{ + "results": { + "mathvista_testmini": { + "gpt_eval_score,none": 23.2, + "gpt_eval_score_stderr,none": "N/A", + "alias": "mathvista_testmini" + }, + "mme": { + "mme_cognition_score,none": 314.2857142857143, + "mme_cognition_score_stderr,none": "N/A", + "mme_percetion_score,none": 1367.7411964785915, + "mme_percetion_score_stderr,none": "N/A", + "alias": "mme" + }, + "mmmu_val": { + "mmmu_acc,none": 0.41333, + "mmmu_acc_stderr,none": "N/A", + "alias": "mmmu_val" + }, + "mmstar": { + "coarse perception,none": 0.676844470255589, + "coarse perception_stderr,none": "N/A", + "fine-grained perception,none": 0.3550294767870302, + "fine-grained perception_stderr,none": "N/A", + "instance reasoning,none": 0.5366379757463484, + "instance reasoning_stderr,none": "N/A", + "logical reasoning,none": 0.3494995653411495, + "logical reasoning_stderr,none": "N/A", + "math,none": 0.31132495767620066, + "math_stderr,none": "N/A", + "science & technology,none": 0.2568647183257686, + "science & technology_stderr,none": "N/A", + "alias": "mmstar" + } + }, + "configs": { + "mathvista_testmini": { + "task": "mathvista_testmini", + "dataset_path": "AI4Math/MathVista", + "dataset_kwargs": { + "token": true + }, + "test_split": "testmini", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "gpt_eval_score", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "until": [ + "ASSISTANT:" + ], + "max_new_tokens": 1024, + "temperature": 0.0, + "top_p": 1.0, + "num_beams": 1, + "do_sample": false, + "image_aspect_ratio": "original" + }, + "repeats": 1, + "should_decontaminate": false, + "model_specific_prompt_kwargs": { + "default": { + "shot_type": "format-prompt", + "shot": 0, + "use_caption": false, + "use_ocr": false + }, + "phi3v": { + "shot_type": "solution" + } + }, + "model_specific_generation_kwargs": { + "llava": { + "image_aspect_ratio": "original" + } + } + }, + "mme": { + "task": "mme", + "dataset_path": "lmms-lab/MME", + "dataset_kwargs": { + "token": false + }, + "test_split": "test", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "mme_percetion_score", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "mme_cognition_score", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "max_new_tokens": 16, + "temperature": 0.0, + "top_p": 1.0, + "num_beams": 1, + "do_sample": false, + "until": [ + "\n\n" + ] + }, + "repeats": 1, + "should_decontaminate": false, + "metadata": [ + { + "version": 0.0 + } + ], + "model_specific_prompt_kwargs": { + "default": { + "pre_prompt": "", + "post_prompt": "\nAnswer the question using a single word or phrase." + }, + "gpt4v": { + "pre_prompt": "", + "post_prompt": "\nAnswer the question with Yes or No." + }, + "qwen_vl": { + "pre_prompt": "", + "post_prompt": " Answer:" + }, + "otterhd": { + "pre_prompt": "", + "post_prompt": " Answer:" + }, + "xcomposer2_4khd": { + "pre_prompt": "[UNUSED_TOKEN_146]user\n", + "post_prompt": " Answer this question briefly[UNUSED_TOKEN_145]\n[UNUSED_TOKEN_146]assistant\n" + } + } + }, + "mmmu_val": { + "task": "mmmu_val", + "dataset_path": "lmms-lab/MMMU", + "test_split": "validation", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "mmmu_acc", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "max_new_tokens": 128, + "until": [ + "\n\n" + ], + "image_aspect_ratio": "original" + }, + "repeats": 1, + "should_decontaminate": false, + "metadata": [ + { + "version": 0.0 + } + ], + "model_specific_generation_kwargs": { + "llava": { + "image_aspect_ratio": "original" + } + } + }, + "mmstar": { + "task": "mmstar", + "dataset_path": "Lin-Chen/MMStar", + "dataset_kwargs": { + "token": true + }, + "test_split": "val", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "coarse perception", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "fine-grained perception", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "instance reasoning", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "logical reasoning", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "science & technology", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "math", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "until": [ + "\n\n" + ], + "do_sample": false + }, + "repeats": 1, + "should_decontaminate": false, + "metadata": [ + { + "version": 0.0 + } + ], + "model_specific_prompt_kwargs": { + "default": { + "pre_prompt": "", + "post_prompt": "\nAnswer with the option's letter from the given choices directly" + } + } + } + }, + "versions": { + "mathvista_testmini": "Yaml", + "mme": "Yaml", + "mmmu_val": "Yaml", + "mmstar": "Yaml" + }, + "n-shot": { + "mathvista_testmini": 0, + "mme": 0, + "mmmu_val": 0, + "mmstar": 0 + }, + "model_configs": { + "model": "llava", + "model_args": "pretrained=/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/SMOE/665K36/revise_Full_smoe_sharev3/checkpoint-8318,conv_template=phi35", + "batch_size": "1", + "device": null, + "limit": null, + "bootstrap_iters": 100000, + "gen_kwargs": "" + }, + "git_hash": "289c7fe5" +} \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/submissions/mathvista_testmini_scores.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/submissions/mathvista_testmini_scores.json new file mode 100644 index 0000000000000000000000000000000000000000..30a328aa500ea880bb98bc1078e4655a0c49bf19 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/submissions/mathvista_testmini_scores.json @@ -0,0 +1,26873 @@ +{ + "1": { + "question_id": "1", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: When a spring does work on an object, we cannot find the work by simply multiplying the spring force by the object's displacement. The reason is that there is no one value for the force-it changes. However, we can split the displacement up into an infinite number of tiny parts and then approximate the force in each as being constant. Integration sums the work done in all those parts. Here we use the generic result of the integration.\r\n\r\nIn Figure, a cumin canister of mass $m=0.40 \\mathrm{~kg}$ slides across a horizontal frictionless counter with speed $v=0.50 \\mathrm{~m} / \\mathrm{s}$. It then runs into and compresses a spring of spring constant $k=750 \\mathrm{~N} / \\mathrm{m}$. When the canister is momentarily stopped by the spring, by what distance $d$ is the spring compressed?", + "choices": null, + "answer": "1.2", + "extraction": "0.1", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 720, + "img_width": 1514, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "3": { + "question_id": "3", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u25b3ABC\u7684\u4e24\u5185\u89d2\u5e73\u5206\u7ebfOB\u3001OC\u76f8\u4ea4\u4e8e\u70b9O\uff0c\u82e5\u2220A\uff1d110\u00b0\uff0c\u5219\u2220BOC\uff1d\uff08\uff09\nChoices:\n(A) 135\u00b0\n(B) 140\u00b0\n(C) 145\u00b0\n(D) 150\u00b0", + "choices": [ + "135\u00b0", + "140\u00b0", + "145\u00b0", + "150\u00b0" + ], + "answer": "145\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "135\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 60, + "img_width": 131, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "5": { + "question_id": "5", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m\\angle H$\nChoices:\n(A) 97\n(B) 102\n(C) 107\n(D) 122", + "choices": [ + "97", + "102", + "107", + "122" + ], + "answer": "97", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "97", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 245, + "img_width": 322, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "7": { + "question_id": "7", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) after eight.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "9": { + "question_id": "9", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\u662f\u4e00\u682a\u7f8e\u4e3d\u7684\u52fe\u80a1\u6811\uff0c\u5176\u4e2d\u6240\u6709\u56db\u8fb9\u5f62\u90fd\u662f\u6b63\u65b9\u5f62\uff0c\u6240\u6709\u7684\u4e09\u89d2\u5f62\u90fd\u662f\u76f4\u89d2\u4e09\u89d2\u5f62\uff0c\u82e5\u6b63\u65b9\u5f62A\u3001B\u7684\u9762\u79ef\u5206\u522b\u4e3a5\u30013\uff0c\u5219\u6700\u5927\u6b63\u65b9\u5f62C\u7684\u9762\u79ef\u662f\uff08\uff09\nChoices:\n(A) 15\n(B) 13\n(C) 11\n(D) 8", + "choices": [ + "15", + "13", + "11", + "8" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 155, + "img_width": 134, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "11": { + "question_id": "11", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red things. Subtract all tiny matte balls. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "13": { + "question_id": "13", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many objects are preferred by more than 90 percent of people in at least one category?", + "choices": null, + "answer": "0", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "15": { + "question_id": "15", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism with be most affected if algae was eliminated?\nChoices:\n(A) Tilapia\n(B) Common water flea\n(C) Great diving beetle\n(D) Tadpole", + "choices": [ + "Tilapia", + "Common water flea", + "Great diving beetle", + "Tadpole" + ], + "answer": "Common water flea", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Tilapia", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 232, + "img_width": 400, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "17": { + "question_id": "17", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220ACB\uff1d90\u00b0\uff0cD\u662fAB\u7684\u4e2d\u70b9\uff0cAB\uff1d10\uff0c\u5219CD\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 5\n(B) 6\n(C) 8\n(D) 10", + "choices": [ + "5", + "6", + "8", + "10" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 172, + "img_width": 125, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "19": { + "question_id": "19", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the highest amount this class measures?", + "choices": null, + "answer": "400", + "extraction": "1000", + "prediction": "1000", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 684, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "21": { + "question_id": "21", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 4 dots divided into 2 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 418, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "23": { + "question_id": "23", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The derivative of f(x) at x=2 is ____ that at x=5\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "equal to", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 393, + "img_width": 552, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "25": { + "question_id": "25", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Periwinkle the smoothest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 770, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "27": { + "question_id": "27", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "11", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1752, + "img_width": 2628, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "29": { + "question_id": "29", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 440, + "img_width": 670, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "31": { + "question_id": "31", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there more big red rubber double buss in front of the large red double bus than big green things?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "33": { + "question_id": "33", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use a sector paper sheet with a central angle of 120.0 and a radius of 6.0 to roll into a conical bottomless paper cap (as shown in the picture), then the bottom perimeter of the paper cap is ()\nChoices:\n(A) 2\u03c0cm\n(B) 3\u03c0cm\n(C) 4\u03c0cm\n(D) 5\u03c0cm", + "choices": [ + "2\u03c0cm", + "3\u03c0cm", + "4\u03c0cm", + "5\u03c0cm" + ], + "answer": "4\u03c0cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2\u03c0cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 95, + "img_width": 331, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "35": { + "question_id": "35", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u662f\u2299O\u7684\u76f4\u5f84\uff0cEF\uff0cEB\u662f\u2299O\u7684\u5f26\uff0c\u70b9E\u662fFEB\u7684\u4e2d\u70b9\uff0cEF\u4e0eAB\u4ea4\u4e8e\u70b9C\uff0c\u8fde\u63a5OF\uff0c\u82e5\u2220AOF\uff1d40\u00b0\uff0c\u5219\u2220F\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 20\u00b0\n(B) 35\u00b0\n(C) 40\u00b0\n(D) 55\u00b0", + "choices": [ + "20\u00b0", + "35\u00b0", + "40\u00b0", + "55\u00b0" + ], + "answer": "35\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 141, + "img_width": 151, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "37": { + "question_id": "37", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the limit as x approaches -1?", + "choices": null, + "answer": "3", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 410, + "img_width": 408, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "39": { + "question_id": "39", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function odd or even?\nChoices:\n(A) odd\n(B) even", + "choices": [ + "odd", + "even" + ], + "answer": "odd", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "odd", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 304, + "img_width": 433, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "41": { + "question_id": "41", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 3491, + "img_width": 5236, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "43": { + "question_id": "43", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use the graph to answer the question below. Which month is the wettest on average in Christchurch?\nChoices:\n(A) August\n(B) April\n(C) May", + "choices": [ + "August", + "April", + "May" + ], + "answer": "May", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "August", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "elementary school", + "img_height": 323, + "img_width": 449, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "45": { + "question_id": "45", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An administrator at the Department of Motor Vehicles (DMV) tracked the average wait time from month to month. According to the table, what was the rate of change between August and September? (Unit: minutes per month)", + "choices": null, + "answer": "-3", + "extraction": "-1", + "prediction": "-1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 273, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "47": { + "question_id": "47", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all rubber balls. Subtract all yellow shiny things. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "49": { + "question_id": "49", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the digits on either end of the sign in the corner?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 476, + "img_width": 626, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "51": { + "question_id": "51", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of gray rubber objects in front of the small yellow aeroplane greater than the number of big cyan matte fighters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "53": { + "question_id": "53", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 593, + "img_width": 800, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "55": { + "question_id": "55", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u4e00\u5757\u76f4\u89d2\u4e09\u89d2\u677f60\u00b0\u7684\u89d2\u7684\u9876\u70b9A\u4e0e\u76f4\u89d2\u9876\u70b9C\u5206\u522b\u5728\u4e24\u5e73\u884c\u7ebfFG\uff0cDE\u4e0a\uff0c\u659c\u8fb9AB\u5e73\u5206\u2220CAG\uff0c\u4ea4\u76f4\u7ebfDE\u4e8e\u70b9H\uff0c\u5219\u2220BCH\u7684\u5927\u5c0f\u4e3a\uff08\uff09\nChoices:\n(A) 60\u00b0\n(B) 45\u00b0\n(C) 30\u00b0\n(D) 25\u00b0", + "choices": [ + "60\u00b0", + "45\u00b0", + "30\u00b0", + "25\u00b0" + ], + "answer": "30\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 125, + "img_width": 175, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "57": { + "question_id": "57", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small balls. Subtract all blue rubber things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "59": { + "question_id": "59", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, CD is the chord of \u2299O, \u2220ADC = 26.0, then the degree of \u2220CAB is ()\nChoices:\n(A) 26\u00b0\n(B) 74\u00b0\n(C) 64\u00b0\n(D) 54\u00b0", + "choices": [ + "26\u00b0", + "74\u00b0", + "64\u00b0", + "54\u00b0" + ], + "answer": "64\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "26\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 146, + "img_width": 157, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "61": { + "question_id": "61", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Coral the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 427, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "63": { + "question_id": "63", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red matte cubes. Subtract all small green metal objects. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "65": { + "question_id": "65", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: is f(3) > 0?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 325, + "img_width": 327, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "67": { + "question_id": "67", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the square?", + "choices": null, + "answer": "16", + "extraction": "16", + "prediction": "16", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 292, + "img_width": 320, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "69": { + "question_id": "69", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big matte balls. Subtract all green rubber objects. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "71": { + "question_id": "71", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the rectangle?", + "choices": null, + "answer": "18", + "extraction": "36", + "prediction": "36", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 292, + "img_width": 187, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "73": { + "question_id": "73", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Complete the matrix.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "D", + "extraction": "A", + "prediction": "A", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 654, + "img_width": 387, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "75": { + "question_id": "75", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Sky Blue less than Web Maroon?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "77": { + "question_id": "77", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year showed the largest difference in the data points between the two lines", + "choices": null, + "answer": "2019", + "extraction": "2014", + "prediction": "2014", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "79": { + "question_id": "79", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A, B, C, and D are on circle O, and point E is on the extended line of AD. If \u2220ABC = 60.0, then the degree of \u2220CDE is ()\nChoices:\n(A) 30\u00b0\n(B) 45\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "30\u00b0", + "45\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "60\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 104, + "img_width": 123, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "81": { + "question_id": "81", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of r at theta=3*pi/2?", + "choices": null, + "answer": "-1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 460, + "img_width": 616, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "83": { + "question_id": "83", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of shiny buss less than the number of matte things?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "85": { + "question_id": "85", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many countries have people working for more than 35 hours over the years?", + "choices": null, + "answer": "2", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "87": { + "question_id": "87", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At a price of $790, is there a shortage or a surplus?'\nChoices:\n(A) shortage\n(B) surplus", + "choices": [ + "shortage", + "surplus" + ], + "answer": "surplus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "shortage", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 353, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "89": { + "question_id": "89", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many miles per gallon do an average city bus get?", + "choices": null, + "answer": "25", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 384, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "91": { + "question_id": "91", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of brown suvs less than the number of brown rubber school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "93": { + "question_id": "93", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What's the computing and wirless total for semiconductor demand in 2014?", + "choices": null, + "answer": "197.3", + "extraction": "100.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "95": { + "question_id": "95", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the straight lines AB and CD intersect at point O, OD bisects \u2220AOE, \u2220BOC = 50.0, then \u2220EOB = ()\nChoices:\n(A) 50\u00b0\n(B) 60\u00b0\n(C) 70\u00b0\n(D) 80\u00b0", + "choices": [ + "50\u00b0", + "60\u00b0", + "70\u00b0", + "80\u00b0" + ], + "answer": "80\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 162, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "97": { + "question_id": "97", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracies higher than 9?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "99": { + "question_id": "99", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which cat is larger?\nChoices:\n(A) white five\n(B) white three\n(C) white four\n(D) white one\n(E) white two", + "choices": [ + "white five", + "white three", + "white four", + "white one", + "white two" + ], + "answer": "white one", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "white five", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "101": { + "question_id": "101", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which shape is most erect?\nChoices:\n(A) Lanceolate\n(B) Heart-shaped\n(C) Linear\n(D) Spatulate", + "choices": [ + "Lanceolate", + "Heart-shaped", + "Linear", + "Spatulate" + ], + "answer": "Linear", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Lanceolate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1204, + "img_width": 376, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "103": { + "question_id": "103", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small purple matte blocks. Subtract all blocks. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "105": { + "question_id": "105", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Violet have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 727, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "107": { + "question_id": "107", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past six.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "109": { + "question_id": "109", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny balls. Subtract all green metallic things. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "5", + "prediction": "5", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "111": { + "question_id": "111", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big gray matte things. Subtract all small metallic cylinders. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "113": { + "question_id": "113", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many baseballs are there?", + "choices": null, + "answer": "20", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 458, + "img_width": 721, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "115": { + "question_id": "115", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1079, + "img_width": 826, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "117": { + "question_id": "117", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the range of this function?\nChoices:\n(A) [0, 2]\n(B) [3, 2]\n(C) [2, 4]\n(D) [-3, 4]", + "choices": [ + "[0, 2]", + "[3, 2]", + "[2, 4]", + "[-3, 4]" + ], + "answer": "[0, 2]", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "[0, 2]", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 356, + "img_width": 460, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "119": { + "question_id": "119", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, P is a point outside \u2299O, PA and PB intersect \u2299O at two points C and D respectively. It is known that the central angles of \u2040AB and \u2040CD are 90.0 and 50.0 respectively, then \u2220P = ()\nChoices:\n(A) 45\u00b0\n(B) 40\u00b0\n(C) 25\u00b0\n(D) 20\u00b0", + "choices": [ + "45\u00b0", + "40\u00b0", + "25\u00b0", + "20\u00b0" + ], + "answer": "20\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 165, + "img_width": 103, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "121": { + "question_id": "121", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In trying to calculate how much money could be saved by packing lunch, Manny recorded the amount he spent on lunch each day. According to the table, what was the rate of change between Wednesday and Thursday? (Unit: $, per day)", + "choices": null, + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 235, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "123": { + "question_id": "123", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagram represents successive rotations, starting from the top down. Which shape comes next?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "D", + "extraction": "B", + "prediction": "B", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 579, + "img_width": 412, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "125": { + "question_id": "125", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens if caterpillars decrease?\nChoices:\n(A) plants decrease\n(B) plants increase\n(C) nothing happens\n(D) none of the above", + "choices": [ + "plants decrease", + "plants increase", + "nothing happens", + "none of the above" + ], + "answer": "plants increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "plants decrease", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 947, + "img_width": 850, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "127": { + "question_id": "127", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much more accurate is the most accurate algorithm compared the least accurate algorithm?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "129": { + "question_id": "129", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the twig to the nearest inch. The twig is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 156, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "131": { + "question_id": "131", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have value below 40?", + "choices": null, + "answer": "3", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "133": { + "question_id": "133", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the merchandise exports greater than 0.92 %?", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1268, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "135": { + "question_id": "135", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of buss that are in front of the big yellow aeroplane less than the number of matte bicycles that are on the right side of the tiny thing?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "137": { + "question_id": "137", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function (f: R to R) injective?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 291, + "img_width": 258, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "139": { + "question_id": "139", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Indigo have the lowest value?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 543, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "141": { + "question_id": "141", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is a long ladder leaning on the wall, the foot of the ladder B is away from the wall 1.6, the point D on the ladder is away from the wall 1.4, the length of BD is 0.55, then the length of the ladder is ()\nChoices:\n(A) 3.85\u7c73\n(B) 4.00\u7c73\n(C) 4.40\u7c73\n(D) 4.50\u7c73", + "choices": [ + "3.85\u7c73", + "4.00\u7c73", + "4.40\u7c73", + "4.50\u7c73" + ], + "answer": "4.40\u7c73", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3.85\u7c73", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 128, + "img_width": 78, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "143": { + "question_id": "143", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the parallelogram ABCD, CE bisects \u2220BCD and it intersects the AD edge at point E, and DE = 3.0, then the length of AB is ()\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 6", + "choices": [ + "1", + "2", + "3", + "6" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 85, + "img_width": 204, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "145": { + "question_id": "145", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Can you find the missing term?", + "choices": null, + "answer": "10", + "extraction": "15", + "prediction": "15", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 506, + "img_width": 900, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "147": { + "question_id": "147", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagrams below show two pure samples of gas in identical closed, rigid containers. Each colored ball represents one gas particle. Both samples have the same number of particles. Compare the average kinetic energies of the particles in each sample. Which sample has the higher temperature?\nChoices:\n(A) neither; the samples have the same temperature\n(B) sample B\n(C) sample A", + "choices": [ + "neither; the samples have the same temperature", + "sample B", + "sample A" + ], + "answer": "sample B", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; the samples have the same temperature", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 405, + "img_width": 563, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "149": { + "question_id": "149", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfl1\u2225l2\uff0c\u22201\uff1d50\u00b0\uff0c\u22202\uff1d75\u00b0\uff0c\u5219\u22203\uff1d\uff08\uff09\nChoices:\n(A) 55\u00b0\n(B) 60\u00b0\n(C) 65\u00b0\n(D) 70\u00b0", + "choices": [ + "55\u00b0", + "60\u00b0", + "65\u00b0", + "70\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "55\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 93, + "img_width": 156, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "151": { + "question_id": "151", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When does the function reach its local maximum?\nChoices:\n(A) (u1, u2) = (0, 0)\n(B) (u1, u2) = (1, 0)\n(C) (u1, u2) = (0, 1)\n(D) (u1, u2) = (1, 1)", + "choices": [ + "(u1, u2) = (0, 0)", + "(u1, u2) = (1, 0)", + "(u1, u2) = (0, 1)", + "(u1, u2) = (1, 1)" + ], + "answer": "(u1, u2) = (0, 0)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(u1, u2) = (0, 0)", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 325, + "img_width": 458, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "153": { + "question_id": "153", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would be impacted by an increase in owls?\nChoices:\n(A) sun\n(B) grasshoppers\n(C) grass\n(D) mice", + "choices": [ + "sun", + "grasshoppers", + "grass", + "mice" + ], + "answer": "mice", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "sun", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 423, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "155": { + "question_id": "155", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Web Green have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 601, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "157": { + "question_id": "157", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9335", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 279, + "img_width": 637, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "159": { + "question_id": "159", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between two consecutive major ticks on the Y-axis ?", + "choices": null, + "answer": "100", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1000, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "161": { + "question_id": "161", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the two numbers visible in the picture?", + "choices": null, + "answer": "71", + "extraction": "16", + "prediction": "16", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "163": { + "question_id": "163", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "7519", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 285, + "img_width": 637, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "165": { + "question_id": "165", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all cyan rubber cylinders. Subtract all tiny shiny cubes. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "167": { + "question_id": "167", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the biggest zero of this function?", + "choices": null, + "answer": "2", + "extraction": "-1", + "prediction": "-1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1920, + "img_width": 1920, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "169": { + "question_id": "169", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between two consecutive major ticks on the Y-axis ?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1049, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "171": { + "question_id": "171", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many cinnamon rolls are there?", + "choices": null, + "answer": "20", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 190, + "img_width": 467, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "173": { + "question_id": "173", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of small rubber buss behind the big green road bike less than the number of suvs that are behind the large brown matte truck?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "175": { + "question_id": "175", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of accuracies of the algorithm liver for all the datasets?", + "choices": null, + "answer": "24", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "177": { + "question_id": "177", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of brown tandem bikes that are to the left of the small blue matte car greater than the number of tiny blue biplanes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "179": { + "question_id": "179", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0c\u5df2\u77e5AC\uff1d4cm\uff0c\u82e5\u25b3ACD\u7684\u5468\u957f\u4e3a14cm\uff0c\u5219ABCD\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 14cm\n(B) 28cm\n(C) 10cm\n(D) 20cm", + "choices": [ + "14cm", + "28cm", + "10cm", + "20cm" + ], + "answer": "20cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "14cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 94, + "img_width": 157, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "181": { + "question_id": "181", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option is correct?\nChoices:\n(A) A\n(B) B\n(C) C", + "choices": [ + "A", + "B", + "C" + ], + "answer": "C", + "extraction": "B", + "prediction": "B", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 332, + "img_width": 864, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "183": { + "question_id": "183", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown cubes. Subtract all gray cylinders. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "185": { + "question_id": "185", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: An image has the gray level PDF $p_r(r)$ shown in Fig. Q1a. One wants to do histogram specification SO that the processed image will have the specified $p_z(z)$ shown in Fig. Q1b. Can we use intensity mapping function $T: z=1-r$ to achieve the goal?\nChoices:\n(A) True\n(B) False", + "choices": [ + "True", + "False" + ], + "answer": "False", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "True", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 376, + "img_width": 724, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "187": { + "question_id": "187", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9015", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 279, + "img_width": 634, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "189": { + "question_id": "189", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest accuracy reported in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "191": { + "question_id": "191", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the volume of the air carriers in Ethiopia greater than the average volume of the air carriers in Ethiopia taken over all years ?", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1116, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "193": { + "question_id": "193", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red things. Subtract all cylinders. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "195": { + "question_id": "195", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u662f\u2299O\u7684\u76f4\u5f84\uff0cC\uff0cD\u4e24\u70b9\u5728\u2299O\u4e0a\uff0c\u2220BCD\uff1d25\u00b0\uff0c\u5219\u2220AOD\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 120\u00b0\n(B) 125\u00b0\n(C) 130\u00b0\n(D) 135\u00b0", + "choices": [ + "120\u00b0", + "125\u00b0", + "130\u00b0", + "135\u00b0" + ], + "answer": "130\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "120\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 95, + "img_width": 110, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "197": { + "question_id": "197", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many sequences have negative Influence Scores?", + "choices": null, + "answer": "2", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "bar chart", + "grade": "college", + "img_height": 772, + "img_width": 1766, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "199": { + "question_id": "199", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Figure 23-42 is a section of a conducting rod of radius $R_1=1.30 \\mathrm{~mm}$ and length $L=$ $11.00 \\mathrm{~m}$ inside a thin-walled coaxial conducting cylindrical shell of radius $R_2=10.0 R_1$ and the (same) length $L$. The net charge on the rod is $Q_1=+3.40 \\times 10^{-12} \\mathrm{C}$; that on the shell is $Q_2=-2.00 Q_1$. What is the magnitude $E$ of the electric field at radial distance $r=2.00 R_2$?", + "choices": null, + "answer": "0.21", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 303, + "img_width": 262, + "language": "english", + "skills": [ + "algebraic reasoning", + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "201": { + "question_id": "201", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of all the values in the border group?", + "choices": null, + "answer": "19", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "203": { + "question_id": "203", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u57285\u00d74\u7684\u6b63\u65b9\u5f62\u7f51\u683c\u4e2d\uff0c\u6bcf\u4e2a\u5c0f\u6b63\u65b9\u5f62\u7684\u8fb9\u957f\u90fd\u662f1\uff0c\u25b3ABC\u7684\u9876\u70b9\u90fd\u5728\u8fd9\u4e9b\u5c0f\u6b63\u65b9\u5f62\u7684\u9876\u70b9\u4e0a\uff0c\u5219tan\u2220BAC\u7684\u503c\u4e3a\uff08\uff09\nChoices:\n(A) \\frac{4}{3}\n(B) 0.75\n(C) 0.6\n(D) 0.8", + "choices": [ + "\\frac{4}{3}", + "0.75", + "0.6", + "0.8" + ], + "answer": "\\frac{4}{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{4}{3}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 151, + "img_width": 172, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "205": { + "question_id": "205", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A statistician analyzed the number of runs scored by players last season. How many players scored more than 2 runs last season?'", + "choices": null, + "answer": "24", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 190, + "img_width": 351, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "207": { + "question_id": "207", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms magic and secure?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "209": { + "question_id": "209", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the highest value in black line chart ?", + "choices": null, + "answer": "28.3", + "extraction": "1.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "211": { + "question_id": "211", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracies higher than 2?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "213": { + "question_id": "213", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In which year there was lowest per capita real gross domestic product of ohio?", + "choices": null, + "answer": "2001", + "extraction": "1999", + "prediction": "1999", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "215": { + "question_id": "215", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Layla went on a camping trip and logged the number of miles she hiked each day. What is the range of the numbers?'", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 249, + "img_width": 212, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "217": { + "question_id": "217", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the degree of this function?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 202, + "img_width": 304, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "219": { + "question_id": "219", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "221": { + "question_id": "221", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, A, B, C are three points on \u2299O, \u2220ACB = 25.0, then the degree of \u2220BAO is ()\nChoices:\n(A) 50\u00b0\n(B) 55\u00b0\n(C) 60\u00b0\n(D) 65\u00b0", + "choices": [ + "50\u00b0", + "55\u00b0", + "60\u00b0", + "65\u00b0" + ], + "answer": "65\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 108, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "223": { + "question_id": "223", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this an even function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 776, + "img_width": 1430, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "225": { + "question_id": "225", + "query": "Hint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Fig. Q4 shows the contour of an object. Represent it with an 8-directional chain code. The resultant chain code should be normalized with respect to the starting point of the chain code. Represent the answer as a list with each digit as a element.", + "choices": null, + "answer": "[0, 2, 0, 2, 1, 7, 1, 2, 0, 3, 0, 6]", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "true_false": false, + "question_type": "free_form", + "answer_type": "list", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 560, + "img_width": 846, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "227": { + "question_id": "227", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 580, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "229": { + "question_id": "229", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the highest lysine level given?\nChoices:\n(A) 0.33%\n(B) 0.31%\n(C) 0.29%\n(D) 0.32%\n(E) 0.30%", + "choices": [ + "0.33%", + "0.31%", + "0.29%", + "0.32%", + "0.30%" + ], + "answer": "0.30%", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.33%", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2185, + "img_width": 1683, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "231": { + "question_id": "231", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model has the overall best ImageNet 10shot Accuracy score across different training steps?\nChoices:\n(A) Identity\n(B) Uniform\n(C) Uniform / Soft\n(D) Soft / Uniform\n(E) Soft\n(F) Dense", + "choices": [ + "Identity", + "Uniform", + "Uniform / Soft", + "Soft / Uniform", + "Soft", + "Dense" + ], + "answer": "Soft", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Identity", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 988, + "img_width": 2002, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "233": { + "question_id": "233", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 199, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "235": { + "question_id": "235", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the epigraph of a function f an infinite set?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 266, + "img_width": 412, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "237": { + "question_id": "237", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the Red squirrel and deer mouse population were to decrease, what would happen to the deer tick population?\nChoices:\n(A) increase\n(B) fluctuate\n(C) it would decrease\n(D) stay the same", + "choices": [ + "increase", + "fluctuate", + "it would decrease", + "stay the same " + ], + "answer": "it would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 346, + "img_width": 400, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "239": { + "question_id": "239", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Sky Blue the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 769, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "241": { + "question_id": "241", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many models in the table have a model size larger than 10B?", + "choices": null, + "answer": "11", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1184, + "img_width": 1570, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "243": { + "question_id": "243", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: \u0686\u0646\u062f \u0639\u062f\u062f \u0634\u06cc\u0631\u06cc\u0646\u06cc \u0645\u062b\u0644\u062b\u06cc \u0634\u06a9\u0644 \u062f\u0631 \u062c\u0639\u0628\u0647 \u0627\u0633\u062a\u061f", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1001, + "img_width": 564, + "language": "persian", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "ParsVQA-Caps", + "split": "testmini", + "task": "visual question answering" + }, + "245": { + "question_id": "245", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Damon need to buy a grilled steak and a mushroom pizza? (Unit: $)", + "choices": null, + "answer": "24", + "extraction": "22", + "prediction": "22", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 128, + "img_width": 259, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "247": { + "question_id": "247", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: A spaceship of mass $m=4.50 \\times 10^3 \\mathrm{~kg}$ is in a circular Earth orbit of radius $r=8.00 \\times 10^6 \\mathrm{~m}$ and period $T_0=118.6 \\mathrm{~min}=$ $7.119 \\times 10^3 \\mathrm{~s}$ when a thruster is fired in the forward direction to decrease the speed to $96.0 \\%$ of the original speed. What is the period $T$ of the resulting elliptical orbit (Figure)?", + "choices": null, + "answer": "6.36", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 906, + "img_width": 914, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "249": { + "question_id": "249", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all green rubber cubes. Subtract all red matte blocks. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "251": { + "question_id": "251", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all green balls. Subtract all shiny things. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "253": { + "question_id": "253", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many objects are preferred by more than 7 people in at least one category?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "255": { + "question_id": "255", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u2220BAC = 110.0, if A and B are symmetrical with respect to the line MP, A and C are symmetrical with respect to the line NQ, then the size of \u2220PAQ is ()\nChoices:\n(A) 70\u00b0\n(B) 55\u00b0\n(C) 40\u00b0\n(D) 30\u00b0", + "choices": [ + "70\u00b0", + "55\u00b0", + "40\u00b0", + "30\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "70\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 188, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "257": { + "question_id": "257", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u4ee5\u76f4\u89d2\u4e09\u89d2\u5f62\u7684\u4e09\u8fb9\u4e3a\u8fb9\u5411\u5916\u4f5c\u6b63\u65b9\u5f62\uff0c\u5176\u4e2d\u4e24\u4e2a\u6b63\u65b9\u5f62\u7684\u9762\u79ef\u5982\u56fe\u6240\u793a\uff0c\u5219\u6b63\u65b9\u5f62A\u7684\u9762\u79ef\u4e3a\uff08\uff09\nChoices:\n(A) 6\n(B) 36\n(C) 64\n(D) 8", + "choices": [ + "6", + "36", + "64", + "8" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 119, + "img_width": 109, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "259": { + "question_id": "259", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large yellow metal blocks. Subtract all gray metallic cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "261": { + "question_id": "261", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 345, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "263": { + "question_id": "263", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "38", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 117, + "img_width": 113, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "265": { + "question_id": "265", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Justine's P.E. class participated in a push-up competition, and Justine wrote down how many push-ups each person could do. How many people did at least 60 push-ups? (Unit: people)", + "choices": null, + "answer": "11", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 136, + "img_width": 329, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "267": { + "question_id": "267", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What shape of a leaf is similar to Serrate, but has smaller, evenly-spaced teeth?\nChoices:\n(A) Undulate\n(B) Sinuate\n(C) Serrulate\n(D) Entire", + "choices": [ + "Undulate", + "Sinuate", + "Serrulate", + "Entire" + ], + "answer": "Serrulate", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Undulate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 306, + "img_width": 529, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "269": { + "question_id": "269", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the elevation angle of the top of a building is 30.0 when viewed from point A in the air by a hot air balloon, and the depression angle of this building is 60.0. The horizontal distance between the hot air balloon and the building is 120.0. The height of this building is ()\nChoices:\n(A) 160m\n(B) 160\u221a{3}m\n(C) (160-160\u221a{3})m\n(D) 360m", + "choices": [ + "160m", + "160\u221a{3}m", + "(160-160\u221a{3})m", + "360m" + ], + "answer": "160\u221a{3}m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "160m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 159, + "img_width": 133, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "271": { + "question_id": "271", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find y\nChoices:\n(A) 3\n(B) 4.5\n(C) 5\n(D) 6", + "choices": [ + "3", + "4.5", + "5", + "6" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 287, + "img_width": 448, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "273": { + "question_id": "273", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: One diagonal of a rhombus is twice as long as the other diagonal. If the area of the rhombus is 169 square millimeters, what are the lengths of the diagonals?\nChoices:\n(A) 6.5\n(B) 13\n(C) 26\n(D) 52", + "choices": [ + "6.5", + "13", + "26", + "52" + ], + "answer": "26", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 237, + "img_width": 347, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "275": { + "question_id": "275", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220BAC = 90.0, AD \u22a5 BC at D, DE \u22a5 AB at E, AD = 3.0, DE = 2.0, then the length of CD is ()\nChoices:\n(A) \\frac{21}{2}\n(B) \\frac{\u221a{15}}{2}\n(C) \\frac{9}{2}\n(D) \\frac{3\u221a{5}}{2}", + "choices": [ + "\\frac{21}{2}", + "\\frac{\u221a{15}}{2}", + "\\frac{9}{2}", + "\\frac{3\u221a{5}}{2}" + ], + "answer": "\\frac{3\u221a{5}}{2}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{21}{2}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 107, + "img_width": 185, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "277": { + "question_id": "277", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which cube is identical to the unfolded net?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "D", + "extraction": "E", + "prediction": "E", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 591, + "img_width": 424, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "279": { + "question_id": "279", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would be directly affected by a decrease in sunlight?\nChoices:\n(A) grass\n(B) mouse\n(C) grasshopper\n(D) owl", + "choices": [ + "grass", + "mouse", + "grasshopper", + "owl" + ], + "answer": "grass", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "grass", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 423, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "281": { + "question_id": "281", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Was this a square pizza?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "283": { + "question_id": "283", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{WTY} \\cong \\overline{TWY}$. Find $x$.\nChoices:\n(A) 2\n(B) 4\n(C) 5\n(D) 10", + "choices": [ + "2", + "4", + "5", + "10" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 416, + "img_width": 559, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "285": { + "question_id": "285", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, it is known that AB is the diameter of \u2299O, if the degree of \u2220BOC is 50.0, then the degree of \u2220A is ()\nChoices:\n(A) 50\u00b0\n(B) 40\u00b0\n(C) 30\u00b0\n(D) 25\u00b0", + "choices": [ + "50\u00b0", + "40\u00b0", + "30\u00b0", + "25\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 100, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "287": { + "question_id": "287", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region is larger? R1 or R2?\nA. R1\nB. R2\nChoices:\n(A) R1\n(B) R2\n(C) R5\n(D) R3\n(E) R4", + "choices": [ + "R1", + "R2", + "R5", + "R3", + "R4" + ], + "answer": "R2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "R1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 325, + "img_width": 370, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "289": { + "question_id": "289", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 4 dots divided into 2 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 418, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "291": { + "question_id": "291", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which period the number of full time employees is the maximum?\nChoices:\n(A) Jul '21\n(B) Jun '21\n(C) Mar '21\n(D) May '21\n(E) Apr '21", + "choices": [ + "Jul '21", + "Jun '21", + "Mar '21", + "May '21", + "Apr '21" + ], + "answer": "May '21", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Jul '21", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "293": { + "question_id": "293", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, grasshopper population increase if\nChoices:\n(A) grouse decrease\n(B) chipmunk increases\n(C) grasses increases\n(D) elk increase", + "choices": [ + "grouse decrease", + "chipmunk increases", + "grasses increases", + "elk increase" + ], + "answer": "grasses increases", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "grouse decrease", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 156, + "img_width": 456, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "295": { + "question_id": "295", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "297": { + "question_id": "297", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of green buss greater than the number of blue school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "299": { + "question_id": "299", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the center and the rightmost person? (Unit: years)", + "choices": null, + "answer": "22", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1067, + "img_width": 1600, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "301": { + "question_id": "301", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model performs the best overall across the three stages in terms of Messenger training performance?\nChoices:\n(A) Dynalang\n(B) EMMA\n(C) R2D2\n(D) IMPALA", + "choices": [ + "Dynalang", + "EMMA", + "R2D2", + "IMPALA" + ], + "answer": "Dynalang", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Dynalang", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 524, + "img_width": 2012, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "303": { + "question_id": "303", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Lime Green less than Dim Gray?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 797, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "305": { + "question_id": "305", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people prefer the most preferred object?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "307": { + "question_id": "307", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Figure is an overhead view of the path taken by a race car driver as his car collides with the racetrack wall. Just before the collision, he is traveling at speed $v_i=70 \\mathrm{~m} / \\mathrm{s}$ along a straight line at $30^{\\circ}$ from the wall. Just after the collision, he is traveling at speed $v_f=50 \\mathrm{~m} / \\mathrm{s}$ along a straight line at $10^{\\circ}$ from the wall. His mass $m$ is $80 \\mathrm{~kg}$. The collision lasts for $14 \\mathrm{~ms}$. What is the magnitude of the average force on the driver during the collision?", + "choices": null, + "answer": "2.58", + "extraction": "1000.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 466, + "img_width": 772, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning", + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "309": { + "question_id": "309", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The movie critic liked to count the number of actors in each movie he saw. How many movies had at least 30 actors but fewer than 47 actors? (Unit: movies)", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 136, + "img_width": 131, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "311": { + "question_id": "311", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "2", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1947, + "img_width": 1620, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "313": { + "question_id": "313", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "10", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 334, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "315": { + "question_id": "315", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram above, angle A is congruent to angle BED, and angle C is congruent to angle D. If the ratio of the length of AB to the length of EB is 5:1, and the area of the triangle BED is 5*a^2 + 10, what is the area of triangle ABC?\nChoices:\n(A) 5*a^2 + 10\n(B) 25*a^2 + 50\n(C) 25*a^2 + 100\n(D) 125*a^2 + 250\n(E) cannot be determined", + "choices": [ + "5*a^2 + 10", + "25*a^2 + 50", + "25*a^2 + 100", + "125*a^2 + 250", + "cannot be determined" + ], + "answer": "125*a^2 + 250", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5*a^2 + 10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 463, + "img_width": 749, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "317": { + "question_id": "317", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 361, + "img_width": 496, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "319": { + "question_id": "319", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Would most of the ground cover be considered weeds?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "321": { + "question_id": "321", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At a price of $330, is there a shortage or a surplus?'\nChoices:\n(A) shortage\n(B) surplus", + "choices": [ + "shortage", + "surplus" + ], + "answer": "surplus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "shortage", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 353, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "323": { + "question_id": "323", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Craig just downloaded the new game Gem Excavator on his phone. In the first level, Craig gains points for each green gem he finds. However, he loses points for each red gem he finds. The table shows how the gems affect Craig's points. Which color gem affects Craig's points less?'\nChoices:\n(A) green\n(B) red", + "choices": [ + "green", + "red" + ], + "answer": "green", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "green", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 94, + "img_width": 230, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "325": { + "question_id": "325", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Web Purple have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "327": { + "question_id": "327", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many items sold less than 1 units in at least one store?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "329": { + "question_id": "329", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The derivative of y at x=6 is ____ that at x=8\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "larger than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 2039, + "img_width": 2560, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "331": { + "question_id": "331", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Several people compared how many Web pages they had visited. What is the mean of the numbers?'", + "choices": null, + "answer": "64", + "extraction": "56", + "prediction": "56", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 311, + "img_width": 246, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "333": { + "question_id": "333", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find tan X\nChoices:\n(A) \\frac { 5 } { 12 }\n(B) \\frac { 12 } { 13 }\n(C) \\frac { 17 } { 12 }\n(D) \\frac { 12 } { 5 }", + "choices": [ + "\\frac { 5 } { 12 }", + "\\frac { 12 } { 13 }", + "\\frac { 17 } { 12 }", + "\\frac { 12 } { 5 }" + ], + "answer": "\\frac { 5 } { 12 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac { 5 } { 12 }", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 149, + "img_width": 297, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "335": { + "question_id": "335", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large brown matte balls. Subtract all blue cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "337": { + "question_id": "337", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) to eight.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "339": { + "question_id": "339", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u2299O\u4e2d\uff0cAB=AC\uff0c\u2220BAC\uff1d70\u00b0\uff0c\u5219\u2220AEC\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 65\u00b0\n(B) 75\u00b0\n(C) 50\u00b0\n(D) 55\u00b0", + "choices": [ + "65\u00b0", + "75\u00b0", + "50\u00b0", + "55\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 112, + "img_width": 115, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "341": { + "question_id": "341", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is six (_).\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "o'clock", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "343": { + "question_id": "343", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small purple metallic spheres. Subtract all small purple things. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "345": { + "question_id": "345", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many kites are there?", + "choices": null, + "answer": "25", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 429, + "img_width": 711, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "347": { + "question_id": "347", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of green metallic double buss less than the number of big purple rubber cruisers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "349": { + "question_id": "349", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which capability boasts the highest proportion (%)?\nChoices:\n(A) Rec\n(B) OCR\n(C) Know\n(D) Gen\n(E) Spat\n(F) Math", + "choices": [ + "Rec", + "OCR", + "Know", + "Gen", + "Spat", + "Math" + ], + "answer": "Rec", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Rec", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "bar chart", + "grade": "college", + "img_height": 1348, + "img_width": 1704, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "351": { + "question_id": "351", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer purple rubber objects that are to the left of the red object than tiny matte bicycles?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "353": { + "question_id": "353", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: At time $t=0$ a tank contains $Q_0 \\mathrm{lb}$ of salt dissolved in 100 gal of water; see Figure 2.3.1. Assume that water containing $\\frac{1}{4} \\mathrm{lb}$ of salt/gal is entering the tank at a rate of $r \\mathrm{gal} / \\mathrm{min}$ and that the well-stirred mixture is draining from the tank at the same rate. Set up the initial value problem that describes this flow process. By finding the amount of salt $Q(t)$ in the tank at any time, and the limiting amount $Q_L$ that is present after a very long time, if $r=3$ and $Q_0=2 Q_L$, find the time $T$ after which the salt level is within $2 \\%$ of $Q_L$.", + "choices": null, + "answer": "130.4", + "extraction": "1.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 938, + "img_width": 996, + "language": "english", + "skills": [ + "algebraic reasoning", + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "355": { + "question_id": "355", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the parallel lines a and b are intercepted by the straight line c. If \u22201 = 50.0, then the degree of \u22202 is ()\nChoices:\n(A) 150\u00b0\n(B) 130\u00b0\n(C) 110\u00b0\n(D) 100\u00b0", + "choices": [ + "150\u00b0", + "130\u00b0", + "110\u00b0", + "100\u00b0" + ], + "answer": "130\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "150\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 157, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "357": { + "question_id": "357", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Salmon the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 677, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "359": { + "question_id": "359", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Kylie spent a week at the beach and recorded the number of shells she found each day. According to the table, what was the rate of change between Thursday and Friday? (Unit: shells per day)", + "choices": null, + "answer": "-7", + "extraction": "-3", + "prediction": "-3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 241, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "361": { + "question_id": "361", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the mold are the cylindrical ports located? \nChoices:\n(A) Upper half\n(B) Lower half\n(C) Medial half\n(D) Lateral half", + "choices": [ + "Upper half", + "Lower half", + "Medial half", + "Lateral half" + ], + "answer": "Lower half", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Upper half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "medical image", + "grade": "college", + "img_height": 435, + "img_width": 596, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "PMC-VQA", + "split": "testmini", + "task": "visual question answering" + }, + "363": { + "question_id": "363", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny gray metal blocks. Subtract all purple things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "365": { + "question_id": "365", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big yellow metallic spheres. Subtract all tiny metal things. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "367": { + "question_id": "367", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "14", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 429, + "img_width": 873, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "369": { + "question_id": "369", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function (f: R to R) surjective?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 331, + "img_width": 266, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "371": { + "question_id": "371", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220ABC\uff1d90\u00b0\uff0c\u70b9D\u3001E\u3001F\u5206\u522b\u662f\u8fb9AB\u3001BC\u3001CA\u7684\u4e2d\u70b9\uff0c\u82e5DE+BF\uff1d8\uff0c\u5219BF\u7684\u503c\u4e3a\uff08\uff09\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 146, + "img_width": 109, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "373": { + "question_id": "373", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the quadrilateral ABCD, \u2220BAD = 120.0, \u2220B = \u2220D = 90.0, if you find a point M on BC and CD respectively, so that the perimeter of \u25b3AMN is the smallest, then the degree of \u2220AMN + \u2220ANM is ()\nChoices:\n(A) 110\u00b0\n(B) 120\u00b0\n(C) 140\u00b0\n(D) 150\u00b0", + "choices": [ + "110\u00b0", + "120\u00b0", + "140\u00b0", + "150\u00b0" + ], + "answer": "120\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "110\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 161, + "img_width": 122, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "375": { + "question_id": "375", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the length of $AC$ in the isosceles triangle ABC. \nChoices:\n(A) 1.5\n(B) 7\n(C) 11\n(D) 12.5", + "choices": [ + "1.5", + "7", + "11", + "12.5" + ], + "answer": "7", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 293, + "img_width": 703, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "377": { + "question_id": "377", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 649, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "379": { + "question_id": "379", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram of the food web shown what will most directly be affected by the loss of the trees?\nChoices:\n(A) horses\n(B) cats\n(C) nothing\n(D) bears", + "choices": [ + "horses", + "cats", + "nothing", + "bears" + ], + "answer": "horses", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "horses", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 400, + "img_width": 570, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "381": { + "question_id": "381", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there more tiny cyan matte articulated buss left of the big school bus than small yellow matte double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "383": { + "question_id": "383", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What value you get , if you divide the largest bar value by 2 ?", + "choices": null, + "answer": "131253.5", + "extraction": "12.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "385": { + "question_id": "385", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Cyan have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 771, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "387": { + "question_id": "387", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Of the four balls in the photo, what is the percentage of them on the ground?", + "choices": null, + "answer": "100", + "extraction": "75", + "prediction": "75", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 485, + "img_width": 363, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "389": { + "question_id": "389", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At a price of $320, is there a shortage or a surplus?'\nChoices:\n(A) shortage\n(B) surplus", + "choices": [ + "shortage", + "surplus" + ], + "answer": "shortage", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "shortage", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 353, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "391": { + "question_id": "391", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, point O is the center of \u2299O, points A, B, and C are on \u2299O, AO \u2225 BC, \u2220AOB = 40.0, then the degree of \u2220OAC is equal to ()\nChoices:\n(A) 40\u00b0\n(B) 60\u00b0\n(C) 50\u00b0\n(D) 20\u00b0", + "choices": [ + "40\u00b0", + "60\u00b0", + "50\u00b0", + "20\u00b0" + ], + "answer": "20\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 96, + "img_width": 96, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "393": { + "question_id": "393", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest and the lowest dark blue bar?", + "choices": null, + "answer": "54", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "395": { + "question_id": "395", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average age of the people in this picture?", + "choices": null, + "answer": "10", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "397": { + "question_id": "397", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9A\u3001B\u3001C\u90fd\u5728\u534a\u5f84\u4e3a2\u7684\u2299O\u4e0a\uff0c\u2220C\uff1d30\u00b0\uff0c\u5219\u5f26AB\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 2.2\n(D) 2.5", + "choices": [ + "1", + "2", + "2.2", + "2.5" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 70, + "img_width": 73, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "399": { + "question_id": "399", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "6", + "extraction": "6", + "prediction": "6", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 241, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "401": { + "question_id": "401", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "403": { + "question_id": "403", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find TX if $E X=24$ and $D E=7$\nChoices:\n(A) 7\n(B) 24\n(C) 25\n(D) 32", + "choices": [ + "7", + "24", + "25", + "32" + ], + "answer": "32", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 221, + "img_width": 564, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "405": { + "question_id": "405", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "19", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1351, + "img_width": 1801, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "407": { + "question_id": "407", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9B\uff0cD\uff0cE\uff0cC\u5728\u540c\u4e00\u6761\u76f4\u7ebf\u4e0a\uff0c\u82e5\u25b3ABD\u224c\u25b3ACE\uff0c\u2220AEC\uff1d110\u00b0\uff0c\u5219\u2220DAE\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 30\u00b0\n(B) 40\u00b0\n(C) 50\u00b0\n(D) 60\u00b0", + "choices": [ + "30\u00b0", + "40\u00b0", + "50\u00b0", + "60\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 67, + "img_width": 76, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "409": { + "question_id": "409", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the radius of this circle?", + "choices": null, + "answer": "5", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 356, + "img_width": 358, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "411": { + "question_id": "411", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average percentage of population having access to electricity per year?", + "choices": null, + "answer": "100", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1081, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "413": { + "question_id": "413", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5df2\u77e5\uff1a\u5982\u56fe\uff0c\u25b3ABC\u4e2d\uff0cAB\uff1dAC\uff0cBD\u4e3a\u2220ABC\u7684\u5e73\u5206\u7ebf\uff0c\u2220BDC\uff1d75\u00b0\uff0c\u5219\u2220A\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 25\u00b0\n(B) 35\u00b0\n(C) 40\u00b0\n(D) 45\u00b0", + "choices": [ + "25\u00b0", + "35\u00b0", + "40\u00b0", + "45\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 132, + "img_width": 123, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "415": { + "question_id": "415", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average annual wage in Slovak Republic in the year 2019", + "choices": null, + "answer": "15017", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "417": { + "question_id": "417", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "8", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 748, + "img_width": 564, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "419": { + "question_id": "419", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) after nine.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "421": { + "question_id": "421", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An elevator cab of mass $m=500 \\mathrm{~kg}$ is descending with speed $v_i=4.0 \\mathrm{~m} / \\mathrm{s}$ when its supporting cable begins to slip, allowing it to fall with constant acceleration $\\vec{a}=\\vec{g} / 5$.\r\nDuring the $12 \\mathrm{~m}$ fall, what is the work $W_T$ done on the cab by the upward pull $\\vec{T}$ of the elevator cable?", + "choices": null, + "answer": "-47", + "extraction": "1200", + "prediction": "1200", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 1190, + "img_width": 550, + "language": "english", + "skills": [ + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "423": { + "question_id": "423", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Deep Pink less than Dark Gray?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 577, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "425": { + "question_id": "425", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5728Rt\u25b3ABC\u4e2d\uff0c\u2220C\uff1d90\u00b0\uff0c\u82e5AC\uff1d6\uff0cBC\uff1d8\uff0c\u5219cosA\u7684\u503c\u4e3a\uff08\uff09\nChoices:\n(A) 0.6\n(B) 0.8\n(C) 0.75\n(D) \\frac{4}{3}", + "choices": [ + "0.6", + "0.8", + "0.75", + "\\frac{4}{3}" + ], + "answer": "0.6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.6", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 120, + "img_width": 171, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "427": { + "question_id": "427", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people prefer the most preferred object?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "429": { + "question_id": "429", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people prefer the least preferred object?", + "choices": null, + "answer": "10", + "extraction": "10", + "prediction": "10", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "431": { + "question_id": "431", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, what would happen to dragonfly if all mayfly dies\nChoices:\n(A) remains the same\n(B) increase\n(C) decrease\n(D) NA", + "choices": [ + "remains the same", + "increase", + "decrease", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "remains the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 297, + "img_width": 464, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "433": { + "question_id": "433", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "5", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 350, + "img_width": 425, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "435": { + "question_id": "435", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of employed females who are not attending school greater than the average percentage of employed females who are not attending school taken over all years ?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 955, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "437": { + "question_id": "437", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fig.Q3 shows an excerpt of the transmission phase of a TCP connection. Assume the length of the IP header is 20 bytes. What is the ACK number at message 6?", + "choices": null, + "answer": "839", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 814, + "img_width": 638, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "439": { + "question_id": "439", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: is this function convex?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 256, + "img_width": 539, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "441": { + "question_id": "441", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "9", + "extraction": "9", + "prediction": "9", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 241, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "443": { + "question_id": "443", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure: In Rt\u25b3ABC, \u2220C = 90.0, AC = 8.0, AB = 10.0, then the value of sinB is equal to ()\nChoices:\n(A) \\frac{3}{5}\n(B) \\frac{4}{5}\n(C) \\frac{3}{4}\n(D) \\frac{4}{3}", + "choices": [ + "\\frac{3}{5}", + "\\frac{4}{5}", + "\\frac{3}{4}", + "\\frac{4}{3}" + ], + "answer": "\\frac{4}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{3}{5}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 80, + "img_width": 169, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "445": { + "question_id": "445", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Slate less than Saddle Brown?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 436, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "447": { + "question_id": "447", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Midnight Blue intersect Purple?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 685, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "449": { + "question_id": "449", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many miles per gallon do the average motorcycle get on the highway?", + "choices": null, + "answer": "40", + "extraction": "50", + "prediction": "50", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "451": { + "question_id": "451", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of small yellow metallic choppers that are behind the large cyan thing less than the number of brown metal double buss that are behind the small yellow shiny thing?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "453": { + "question_id": "453", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "4", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 116, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "455": { + "question_id": "455", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If x = 32 and r = 18, what is the length of the arc shown in the figure above?\nChoices:\n(A) 16*\\pi/5\n(B) 32*\\pi/5\n(C) 36*\\pi\n(D) 288*\\pi/5\n(E) 576*\\pi", + "choices": [ + "16*\\pi/5", + "32*\\pi/5", + "36*\\pi", + "288*\\pi/5", + "576*\\pi" + ], + "answer": "16*\\pi/5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "16*\\pi/5", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 353, + "img_width": 575, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "457": { + "question_id": "457", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "4525", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 97, + "img_width": 605, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "459": { + "question_id": "459", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large cyan matte balls. Subtract all tiny shiny objects. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "461": { + "question_id": "461", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A perceptual audio codec is used to compress an audio signal. The codec groups every 4 barks into a subband and then allocates bits to different subbands according to the result of a spectrum analysis based on a psychoacoustic model. All samples in the same subband are quantized with the same quantizer, and the bit resolution of which is allocated by the codec. (The Bark scale is a psychoacoustical scale proposed by Eberhard Zwicker in 1961.) Fig. Q1a shows the frequency spectrum of a windowed segment of audio signal. The psychoacoustic model shown in Fig. Q1b is used in the audio codec to derive the masking threshold for the audio segment. How many potential maskers in Fig. Q1a?", + "choices": null, + "answer": "7", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 488, + "img_width": 908, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "463": { + "question_id": "463", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large gray things. Subtract all small brown metallic balls. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "465": { + "question_id": "465", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Green the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 628, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "467": { + "question_id": "467", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The degree measures of minor arc $\\widehat{A C}$ and major arc $\\widehat{A D C}$ are $x$ and $y$ respectively. If $m\u2220ABC = 70\u00b0$, find $x$.\nChoices:\n(A) 90\n(B) 100\n(C) 110\n(D) 120", + "choices": [ + "90", + "100", + "110", + "120" + ], + "answer": "110", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "90", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 235, + "img_width": 499, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "469": { + "question_id": "469", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Sky Blue less than Chartreuse?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "471": { + "question_id": "471", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Lily and her friends recorded their scores while playing a board game. Which score did the greatest number of people receive?'", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 190, + "img_width": 351, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "473": { + "question_id": "473", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "12", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2604, + "img_width": 2500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "475": { + "question_id": "475", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 71, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "477": { + "question_id": "477", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past three.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "half", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "479": { + "question_id": "479", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How many times Norway data bigger than Italy data ?", + "choices": null, + "answer": "2.54", + "extraction": "1.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "481": { + "question_id": "481", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 404, + "img_width": 592, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "483": { + "question_id": "483", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, point C is on \u2299O, AE is the tangent of \u2299O, A is the tangent point, connect BC and extend to intersect AE at point D. If \u2220AOC = 80.0, then the degree of \u2220ADB is ()\nChoices:\n(A) 40\u00b0\n(B) 50\u00b0\n(C) 60\u00b0\n(D) 20\u00b0", + "choices": [ + "40\u00b0", + "50\u00b0", + "60\u00b0", + "20\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 129, + "img_width": 165, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "485": { + "question_id": "485", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9D\u5728\u7b49\u8fb9\u25b3ABC\u7684\u8fb9CB\u7684\u5ef6\u957f\u7ebf\u4e0a\uff0c\u70b9E\u5728\u7ebf\u6bb5BC\u4e0a\uff0c\u8fde\u63a5AD\uff0cAE\uff0c\u82e5DA\uff1dDE\uff0c\u4e14\u2220DAB\uff1d20\u00b0\uff0c\u90a3\u4e48\u2220EAC\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 20\u00b0\n(B) 15\u00b0\n(C) 10\u00b0\n(D) 5\u00b0", + "choices": [ + "20\u00b0", + "15\u00b0", + "10\u00b0", + "5\u00b0" + ], + "answer": "10\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 235, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "487": { + "question_id": "487", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer big cars behind the small brown shiny mountain bike than tiny objects on the right side of the bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "489": { + "question_id": "489", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For trapezoid ABCD shown above, AB = 24, AD = 23, and BC = 16. What is the length of segment CD?", + "choices": null, + "answer": "25", + "extraction": "18", + "prediction": "18", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 297, + "img_width": 426, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "491": { + "question_id": "491", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 540, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "493": { + "question_id": "493", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function differentiable at every point?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 847, + "img_width": 800, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "495": { + "question_id": "495", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer green things in front of the blue metallic car than choppers right of the chopper?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "497": { + "question_id": "497", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "499": { + "question_id": "499", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Quadrilateral $ABDC$ is a rectangle. If $m\\angle1 = 38$, find $m \\angle 2$\nChoices:\n(A) 33\n(B) 38\n(C) 52\n(D) 87", + "choices": [ + "33", + "38", + "52", + "87" + ], + "answer": "52", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "33", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 323, + "img_width": 559, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "501": { + "question_id": "501", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big red rubber cylinders. Subtract all blue objects. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "503": { + "question_id": "503", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the leftmost and the center person? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 225, + "img_width": 338, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "505": { + "question_id": "505", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the circle O with a radius of 5.0, the length of the chord AB is 8.0, then the distance from the center O to the chord AB is ()\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 100, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "507": { + "question_id": "507", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen if the hawk population increased?\nChoices:\n(A) mice would increase\n(B) sparrows increased\n(C) garter snakes would decrease\n(D) grass decreased", + "choices": [ + "mice would increase", + "sparrows increased", + "garter snakes would decrease", + "grass decreased" + ], + "answer": "garter snakes would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "mice would increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "509": { + "question_id": "509", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cadet Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 400, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "511": { + "question_id": "511", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people like the most preferred object in the whole chart?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "513": { + "question_id": "513", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the highest value in states that border West Virginia ?\nChoices:\n(A) 43.2%-63.6%\n(B) 45.2%-65.6%\n(C) 42.2%-62.6%\n(D) 41.2%-61.6%\n(E) 44.2%-64.6%", + "choices": [ + "43.2%-63.6%", + "45.2%-65.6%", + "42.2%-62.6%", + "41.2%-61.6%", + "44.2%-64.6%" + ], + "answer": "42.2%-62.6%", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "43.2%-63.6%", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "515": { + "question_id": "515", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: You would potentially see a decrease in which organism if gulls disappeared?\nChoices:\n(A) herring\n(B) kril\n(C) anchovy\n(D) phytoplankton", + "choices": [ + "herring", + "kril", + "anchovy", + "phytoplankton" + ], + "answer": "kril", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "herring", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 549, + "img_width": 398, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "517": { + "question_id": "517", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: At Bloomington Consulting, the head of human resources examined how the number of employees with health care benefits varied in response to policy changes. According to the table, what was the rate of change between 2014 and 2015? (Unit: employees per year)", + "choices": null, + "answer": "-1", + "extraction": "-1", + "prediction": "-1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 275, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "519": { + "question_id": "519", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many Triangles do you see in the picture?", + "choices": null, + "answer": "12", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 852, + "img_width": 948, + "language": "english", + "skills": [ + "logical reasoning", + "geometry reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "521": { + "question_id": "521", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, point C is a point on \u2299O, \u2220C = 20.0, then the degree of \u2220BOC is ()\nChoices:\n(A) 20\u00b0\n(B) 30\u00b0\n(C) 40\u00b0\n(D) 60\u00b0", + "choices": [ + "20\u00b0", + "30\u00b0", + "40\u00b0", + "60\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 100, + "img_width": 120, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "523": { + "question_id": "523", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, a teaching interest group wants to measure the height of a tree CD. They firstly measured the elevation angle of the tree top C at point A as 30.0, and then proceeded 10.0 along the direction of AD to point B, and the elevation angle of tree top C measured at B is 60.0 (the three points A, B, and D are on the same straight line), then the height of the tree CD is ()\nChoices:\n(A) 10m\n(B) 5m\n(C) 5\u221a{3}m\n(D) 10\u221a{3}m", + "choices": [ + "10m", + "5m", + "5\u221a{3}m", + "10\u221a{3}m" + ], + "answer": "5\u221a{3}m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 179, + "img_width": 285, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "525": { + "question_id": "525", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest value shown on the X axis of first plot?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2209, + "img_width": 1711, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "527": { + "question_id": "527", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big shiny cars in front of the red airliner greater than the number of big purple road bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "529": { + "question_id": "529", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what number does the smaller arrow point to?", + "choices": null, + "answer": "1020", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 768, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "531": { + "question_id": "531", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) to five.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "533": { + "question_id": "533", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small cyan cubes. Subtract all large yellow rubber cubes. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "535": { + "question_id": "535", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "-8", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "537": { + "question_id": "537", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of red rubber bicycles less than the number of cyan metal school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "539": { + "question_id": "539", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u70b9D\u3001E\u5206\u522b\u662f\u8fb9AB\u3001BC\u7684\u4e2d\u70b9\uff0c\u82e5\u25b3BDE\u7684\u5468\u957f\u662f6\uff0c\u5219\u25b3ABC\u7684\u5468\u957f\u662f\uff08\uff09\nChoices:\n(A) 8\n(B) 10\n(C) 12\n(D) 14", + "choices": [ + "8", + "10", + "12", + "14" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "8", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 71, + "img_width": 149, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "541": { + "question_id": "541", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the cubes is not identical to the unfolded net?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "D", + "extraction": "D", + "prediction": "D", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 560, + "img_width": 280, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "543": { + "question_id": "543", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer small purple matte cars than brown matte things?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "545": { + "question_id": "545", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Violet Red less than Crimson?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 764, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "547": { + "question_id": "547", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram below, which organisms will be most directly affected by a decrease in the amount of grass?\nChoices:\n(A) Insects\n(B) Hawk and snake\n(C) Snake and raccoon\n(D) Mouse and cricket", + "choices": [ + "Insects", + "Hawk and snake", + "Snake and raccoon", + "Mouse and cricket" + ], + "answer": "Insects", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Insects", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 377, + "img_width": 630, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "549": { + "question_id": "549", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, PA and PB are tangent to \u2299O to A and B respectively. Point C and point D are the moving points on line segments PA and PB, and CD always remains tangent to circle O. If PA = 8.0, then perimeter of \u25b3PCD is ()\nChoices:\n(A) 8\n(B) 12\n(C) 16\n(D) \u4e0d\u80fd\u786e\u5b9a", + "choices": [ + "8", + "12", + "16", + "\u4e0d\u80fd\u786e\u5b9a" + ], + "answer": "16", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "8", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 192, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "551": { + "question_id": "551", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest tattoos in male and the least in female?", + "choices": null, + "answer": "14", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "553": { + "question_id": "553", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Violet less than Chocolate?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "555": { + "question_id": "555", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this nest larger than a fist?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 640, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "557": { + "question_id": "557", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220BAC\uff1d90\u00b0\uff0c\u4ee5Rt\u25b3ABC\u7684\u4e09\u8fb9\u4e3a\u8fb9\u5206\u522b\u5411\u5916\u4f5c\u7b49\u8fb9\u4e09\u89d2\u5f62\u25b3A'BC\uff0c\u25b3AB'C\uff0c\u25b3ABC'\uff0c\u82e5\u25b3A'BC\uff0c\u25b3AB'C\u7684\u9762\u79ef\u5206\u522b\u662f10\u548c4\uff0c\u5219\u25b3ABC'\u7684\u9762\u79ef\u662f\uff08\uff09\nChoices:\n(A) 4\n(B) 6\n(C) 8\n(D) 9", + "choices": [ + "4", + "6", + "8", + "9" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 130, + "img_width": 155, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "559": { + "question_id": "559", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the highest number shown on the black outer part of the watch?", + "choices": null, + "answer": "55", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 768, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "561": { + "question_id": "561", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of gray rubber double buss right of the small red aeroplane the same as the number of small objects that are left of the tiny gray matte bicycle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "563": { + "question_id": "563", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which number on the monitor is higher?\nChoices:\n(A) top\n(B) bottom\n(C) left\n(D) right", + "choices": [ + "top", + "bottom", + "left", + "right" + ], + "answer": "bottom", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "top", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "565": { + "question_id": "565", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model can achieve the best ImageNet 10-shot Accuracy score?\nChoices:\n(A) Soft MoE\n(B) Experts Choice\n(C) Tokens Choice\n(D) Dense", + "choices": [ + "Soft MoE", + "Experts Choice", + "Tokens Choice", + "Dense" + ], + "answer": "Soft MoE", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Soft MoE", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 978, + "img_width": 1966, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "567": { + "question_id": "567", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the slug to the nearest inch. The slug is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 252, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "569": { + "question_id": "569", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which subject had the highest pulse rate in baseline period?", + "choices": null, + "answer": "1", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2284, + "img_width": 1786, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "571": { + "question_id": "571", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 613, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "573": { + "question_id": "573", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A race car driver kept track of how many laps he drove in the past 5 days. What is the mode of the numbers?'", + "choices": null, + "answer": "53", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 203, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "575": { + "question_id": "575", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Lines $l$, $m$, and $n$ are perpendicular bisectors of $\\triangle PQR$ and meet at $T$. If $TQ = 2x$, $PT = 3y - 1$, and $TR = 8$, find $z$.\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 287, + "img_width": 509, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "577": { + "question_id": "577", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Consider the following matrices:\r\n$$\r\n\\mathbf{A}=\\left(\\begin{array}{rrr}\r\n1 & 2 & -1 \\\\\r\n0 & 3 & 1 \\\\\r\n2 & 0 & 1\r\n\\end{array}\\right), \\quad \\mathbf{B}=\\left(\\begin{array}{rrr}\r\n2 & 1 & 0 \\\\\r\n0 & -1 & 2 \\\\\r\n1 & 1 & 3\r\n\\end{array}\\right), \\quad \\mathbf{C}=\\left(\\begin{array}{ll}\r\n2 & 1 \\\\\r\n4 & 3 \\\\\r\n1 & 0\r\n\\end{array}\\right)\r\n$$\r\nFind $|\\mathbf{A B}|$.", + "choices": null, + "answer": "-104", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 142, + "img_width": 533, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "579": { + "question_id": "579", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average number of documents required per shipment to export goods in Uganda per year?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1228, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "581": { + "question_id": "581", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large matte cubes. Subtract all matte blocks. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "583": { + "question_id": "583", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x. Round to the nearest tenth.\r\n\nChoices:\n(A) 5.8\n(B) 6.5\n(C) 14.2\n(D) 44.3", + "choices": [ + "5.8", + "6.5", + "14.2", + "44.3" + ], + "answer": "5.8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5.8", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 465, + "img_width": 319, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "585": { + "question_id": "585", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u77e9\u5f62ABCD\u4e2d\uff0cAB\uff1d2\uff0c\u2220AOB\uff1d60\u00b0\uff0c\u5219BD\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 4\n(B) 3\n(C) 2\n(D) 2\u221a{3}", + "choices": [ + "4", + "3", + "2", + "2\u221a{3}" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 148, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "587": { + "question_id": "587", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At 9.0 in the morning, a ship departs from point A and sails in the direction due east at a speed of 40.0 nautical miles per hour, and arrives at point B at 9.0 and 30.0 minutes. As shown in the figure, the island M is measured from A and B. In the direction of 45.0 north by east and 15.0 north by east, then the distance between B and island M is ()\nChoices:\n(A) 20\u6d77\u91cc\n(B) 20\u221a{2}\u6d77\u91cc\n(C) 15\u6d77\u91cc\n(D) 20\u6d77\u91cc", + "choices": [ + "20\u6d77\u91cc", + "20\u221a{2}\u6d77\u91cc", + "15\u6d77\u91cc", + "20\u6d77\u91cc" + ], + "answer": "20\u221a{2}\u6d77\u91cc", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u6d77\u91cc", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 124, + "img_width": 144, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "589": { + "question_id": "589", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number of things are either large objects behind the shiny double bus or tiny gray metal objects?", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "591": { + "question_id": "591", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 600, + "img_width": 900, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "593": { + "question_id": "593", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average of longest light blue bar and shortest gray bar?", + "choices": null, + "answer": "273", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "595": { + "question_id": "595", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Navy Blue the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "597": { + "question_id": "597", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people prefer the least preferred object?", + "choices": null, + "answer": "10", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "599": { + "question_id": "599", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, AC = 6 and BC = 3. Point P lies on line AB between A and B such that line CP is perpendicular to line AB. Which of the following could be the length of line CP?\nChoices:\n(A) 2\n(B) 4\n(C) 5\n(D) 7\n(E) 8", + "choices": [ + "2", + "4", + "5", + "7", + "8" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 340, + "img_width": 393, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "601": { + "question_id": "601", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What's the ratio of smallest segment and second largest segment?", + "choices": null, + "answer": "0.33", + "extraction": "0.17", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 386, + "img_width": 210, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "603": { + "question_id": "603", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is cumulative increase in weight ( in grams) for \"GROUP C\" in third week ( give an approximate value) ?", + "choices": null, + "answer": "300", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2237, + "img_width": 1754, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "605": { + "question_id": "605", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large green matte cubes. Subtract all big green blocks. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "607": { + "question_id": "607", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow shiny things. Subtract all yellow metal things. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "609": { + "question_id": "609", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big green matte cylinders. Subtract all big brown cubes. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "611": { + "question_id": "611", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A shipping company keeps track of the number of boxes in each shipment they send out. How many shipments had exactly 56 boxes? (Unit: shipments)", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 180, + "img_width": 153, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "613": { + "question_id": "613", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many houses are there?", + "choices": null, + "answer": "10", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 87, + "img_width": 473, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "615": { + "question_id": "615", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If two sides of a triangle measure 12 and 7, which of the following cannot be the perimeter of the triangle?\nChoices:\n(A) 29\n(B) 34\n(C) 37\n(D) 38", + "choices": [ + "29", + "34", + "37", + "38" + ], + "answer": "38", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "29", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 195, + "img_width": 522, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "617": { + "question_id": "617", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The magnitude of the acceleration vector a is $10 \\mathrm{~cm} / \\mathrm{s}^2$. Use the figure to estimate the normal components of $\\mathbf{a}$.", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 484, + "img_width": 478, + "language": "english", + "skills": [ + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "619": { + "question_id": "619", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(4)?", + "choices": null, + "answer": "16", + "extraction": "-2", + "prediction": "-2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 666, + "img_width": 970, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "621": { + "question_id": "621", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The figure above is composed of 25 small triangles that are congruent and equilateral. If the area of triangle DFH is 10, what is the area of triangle AFK?\nChoices:\n(A) 40\n(B) 42.5\n(C) 50\n(D) 52.5\n(E) 62.5", + "choices": [ + "40", + "42.5", + "50", + "52.5", + "62.5" + ], + "answer": "62.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 315, + "img_width": 397, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "623": { + "question_id": "623", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is twelve (_).\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "o'clock", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "625": { + "question_id": "625", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of blue matte school buss greater than the number of large cyan metallic jets?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "627": { + "question_id": "627", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Some friends played a trivia game and recorded their scores. What is the mode of the numbers?'", + "choices": null, + "answer": "6", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 311, + "img_width": 155, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "629": { + "question_id": "629", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people prefer the object hut?", + "choices": null, + "answer": "20", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "631": { + "question_id": "631", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "633": { + "question_id": "633", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, $m\u22201 = 123$. Find the measure of $\\angle 14$.\nChoices:\n(A) 47\n(B) 57\n(C) 67\n(D) 123", + "choices": [ + "47", + "57", + "67", + "123" + ], + "answer": "57", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "47", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 330, + "img_width": 361, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "635": { + "question_id": "635", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, E is any point in \u25b1ABCD, if S~quadrilateral ABCD~ = 6.0, then the area of \u200b\u200bthe shaded part in the figure is ()\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5", + "choices": [ + "2", + "3", + "4", + "5" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 86, + "img_width": 179, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "637": { + "question_id": "637", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfa\u2225b\uff0c\u76f4\u7ebfa\u4e0e\u77e9\u5f62ABCD\u7684\u8fb9AB\uff0cAD\u5206\u522b\u4ea4\u4e8e\u70b9E\uff0cF\uff0c\u76f4\u7ebfb\u4e0e\u77e9\u5f62ABCD\u7684\u8fb9CB\uff0cCD\u5206\u522b\u4ea4\u4e8e\u70b9G\uff0cH\uff0e\u82e5\u2220AFE\uff1d30\u00b0\uff0c\u5219\u2220DHG\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 100\u00b0\n(B) 110\u00b0\n(C) 120\u00b0\n(D) 130\u00b0", + "choices": [ + "100\u00b0", + "110\u00b0", + "120\u00b0", + "130\u00b0" + ], + "answer": "120\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "100\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 108, + "img_width": 166, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "639": { + "question_id": "639", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What does the dial indicate as the top facing number?", + "choices": null, + "answer": "475", + "extraction": "450", + "prediction": "450", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1024, + "img_width": 768, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VizWiz", + "split": "testmini", + "task": "visual question answering" + }, + "641": { + "question_id": "641", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: The graph of the concentration function $c(t)$ is shown after a 7-mg injection of dye into a heart. Use Simpson's Rule to estimate the cardiac output.", + "choices": null, + "answer": "5.77", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 420, + "img_width": 828, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "643": { + "question_id": "643", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, CD is the diameter of \u2299O, chord DE \u2225 OA, if the degree of \u2220D is 50.0, then the degree of \u2220C is ()\nChoices:\n(A) 25\u00b0\n(B) 30\u00b0\n(C) 40\u00b0\n(D) 50\u00b0", + "choices": [ + "25\u00b0", + "30\u00b0", + "40\u00b0", + "50\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 125, + "img_width": 111, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "645": { + "question_id": "645", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAC\uff0cBD\u662f\u83f1\u5f62ABCD\u7684\u5bf9\u89d2\u7ebf\uff0cBH\u22a5AD\u4e8e\u70b9H\uff0c\u82e5AC\uff1d4\uff0cBD\uff1d3\uff0c\u5219BH\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 2.4\n(B) 2.5\n(C) 4.8\n(D) 5", + "choices": [ + "2.4", + "2.5", + "4.8", + "5" + ], + "answer": "2.4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2.4", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 113, + "img_width": 139, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "647": { + "question_id": "647", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the top view.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "B", + "extraction": "E", + "prediction": "E", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 900, + "img_width": 600, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "649": { + "question_id": "649", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many values are below 30 in Mainly are incidents of individual misconduct?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 461, + "img_width": 310, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "651": { + "question_id": "651", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For an assignment, Johnny looked at which countries got the most Nobel Prizes in various decades. In the 1990s, how many more Nobel Prize winners did Canada have than Italy? (Unit: Nobel Prize winners)", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 156, + "img_width": 224, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "653": { + "question_id": "653", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there at least three distinct shades of blue in this photo?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 425, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "655": { + "question_id": "655", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the value of Russia has the highest transport?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 507, + "img_width": 858, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "657": { + "question_id": "657", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Arkansas have a higher value than Indiana ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "659": { + "question_id": "659", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest value of navy blue bar?", + "choices": null, + "answer": "991", + "extraction": "1000", + "prediction": "1000", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "661": { + "question_id": "661", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this function most likely be?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a trigonometric function", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1274, + "img_width": 1732, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "663": { + "question_id": "663", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past six.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "665": { + "question_id": "665", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $h$ in the triangle.\nChoices:\n(A) 4.62\n(B) 5.66\n(C) 6.93\n(D) 8", + "choices": [ + "4.62", + "5.66", + "6.93", + "8" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4.62", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 161, + "img_width": 275, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "667": { + "question_id": "667", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year has the least difference between the used and new cars?", + "choices": null, + "answer": "2015", + "extraction": "2014", + "prediction": "2014", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "669": { + "question_id": "669", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, line segment AB = 10.0, M is the midpoint of line segment AB, C is the midpoint of line segment MB, N is a point of line segment AM, and MN = 1.0, the length of line segment NC ()\nChoices:\n(A) 2\n(B) 2.5\n(C) 3\n(D) 3.5", + "choices": [ + "2", + "2.5", + "3", + "3.5" + ], + "answer": "3.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 18, + "img_width": 187, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "671": { + "question_id": "671", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the size of the semicircle rounded to 2 decimal places?", + "choices": null, + "answer": "14.14", + "extraction": "1.57", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 312, + "img_width": 433, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "673": { + "question_id": "673", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of large green cars less than the number of brown rubber double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "675": { + "question_id": "675", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the cross section of a small reservoir dam is a right trapezoid, the width of crest BC is 6.0, the height of dam is 14.0, and the slope of the slope CD is i = 1.0:2.0, then the length of the dam bottom AD is ()\nChoices:\n(A) 13m\n(B) 34m\n(C) (6+14\u221a{3})m\n(D) 40m", + "choices": [ + "13m", + "34m", + "(6+14\u221a{3})m", + "40m" + ], + "answer": "34m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "13m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 83, + "img_width": 183, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "677": { + "question_id": "677", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of dirtbikes right of the large blue object less than the number of small green metallic cars in front of the tiny matte bicycle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "679": { + "question_id": "679", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b1ABCD, the diagonal AC and BD intersect at point O, if AC = 12.0, BD = 8.0, AB = 7.0, then the perimeter of \u25b3OAB is ()\nChoices:\n(A) 15\n(B) 17\n(C) 21\n(D) 27", + "choices": [ + "15", + "17", + "21", + "27" + ], + "answer": "17", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 73, + "img_width": 173, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "681": { + "question_id": "681", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the largest city in the nation where this plane is headquartered?\nChoices:\n(A) hong kong\n(B) osaka\n(C) shanghai\n(D) tokyo", + "choices": [ + "hong kong", + "osaka", + "shanghai", + "tokyo" + ], + "answer": "tokyo", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "hong kong", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "683": { + "question_id": "683", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 157, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "685": { + "question_id": "685", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to organism c if organism b increased?\nChoices:\n(A) decrease\n(B) increase\n(C) can't predict\n(D) stay same", + "choices": [ + "decrease", + "increase", + "can't predict", + "stay same" + ], + "answer": "increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 246, + "img_width": 574, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "687": { + "question_id": "687", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What could happen that would increase the number of krill?\nChoices:\n(A) increase in phytoplankton\n(B) decrease in penguins\n(C) increase in fish\n(D) increase in birds", + "choices": [ + "increase in phytoplankton", + "decrease in penguins", + "increase in fish", + "increase in birds" + ], + "answer": "increase in phytoplankton", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "increase in phytoplankton", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 396, + "img_width": 576, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "689": { + "question_id": "689", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are these people sitting in a circle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "691": { + "question_id": "691", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Calculate the missing item.", + "choices": null, + "answer": "256", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 500, + "img_width": 596, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "693": { + "question_id": "693", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the orange larger than the car?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "695": { + "question_id": "695", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Salmon greater than Dark Orchid?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 734, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "697": { + "question_id": "697", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the parallelogram ABCD, it is known that AB = 6.0, BC = 9.0, \u2220B = 30.0, then the area of \u200b\u200bthe parallelogram ABCD is ()\nChoices:\n(A) 12\n(B) 18\n(C) 27\n(D) 54", + "choices": [ + "12", + "18", + "27", + "54" + ], + "answer": "27", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "12", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 68, + "img_width": 205, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "699": { + "question_id": "699", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the center and the rightmost person? (Unit: years)", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2684, + "img_width": 4577, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "701": { + "question_id": "701", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 109, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "703": { + "question_id": "703", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the sum of highest value and lowest value of navy blue bar?", + "choices": null, + "answer": "2372.1", + "extraction": "1.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "705": { + "question_id": "705", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the heart wider than more than half the width of the thorax?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "medical image", + "grade": "college", + "img_height": 512, + "img_width": 419, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "VQA-RAD", + "split": "testmini", + "task": "visual question answering" + }, + "707": { + "question_id": "707", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0ca\u2225b\uff0c\u22201\uff1d60\u00b0\uff0c\u5219\u22202\u7684\u5927\u5c0f\u662f\uff08\uff09\nChoices:\n(A) 60\u00b0\n(B) 80\u00b0\n(C) 100\u00b0\n(D) 120\u00b0", + "choices": [ + "60\u00b0", + "80\u00b0", + "100\u00b0", + "120\u00b0" + ], + "answer": "120\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 120, + "img_width": 154, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "709": { + "question_id": "709", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(0)?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 393, + "img_width": 552, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "711": { + "question_id": "711", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 270, + "img_width": 369, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "713": { + "question_id": "713", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $x$.\nChoices:\n(A) 3\n(B) 4\n(C) 6\n(D) 7", + "choices": [ + "3", + "4", + "6", + "7" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 422, + "img_width": 521, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "715": { + "question_id": "715", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this a periodic function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1920, + "img_width": 1920, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "717": { + "question_id": "717", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is \\int_1^{\\infty} {1\\over x^{0.99}} dx finite according to this graph ?\n\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 350, + "img_width": 314, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "719": { + "question_id": "719", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Brenda graphed the daily low temperature for 5 days. What is the range of the numbers?'", + "choices": null, + "answer": "13", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 225, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "721": { + "question_id": "721", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many odd functions are in the graph?", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 297, + "img_width": 441, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "723": { + "question_id": "723", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function convex?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 277, + "img_width": 468, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "725": { + "question_id": "725", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In Figure, suppose that Barbara's velocity relative to Alex is a constant $v_{B A}=52 \\mathrm{~km} / \\mathrm{h}$ and car $P$ is moving in the negative direction of the $x$ axis.\r\n(a) If Alex measures a constant $v_{P A}=-78 \\mathrm{~km} / \\mathrm{h}$ for car $P$, what velocity $v_{P B}$ will Barbara measure?", + "choices": null, + "answer": "-130", + "extraction": "-26", + "prediction": "-26", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 690, + "img_width": 976, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "727": { + "question_id": "727", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the largest and the smallest value in the chart?", + "choices": null, + "answer": "70", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "729": { + "question_id": "729", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest accuracy reported in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "731": { + "question_id": "731", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The train conductor made sure to count the number of passengers on each train. What is the smallest number of passengers? (Unit: passengers)", + "choices": null, + "answer": "40", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 180, + "img_width": 159, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "733": { + "question_id": "733", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Square ABCD. CT: tangent to semicircle. Find the angle \u2220CTD. Return the numeric value.", + "choices": null, + "answer": "63.4", + "extraction": "135.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 1018, + "img_width": 972, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "735": { + "question_id": "735", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big cyan things in front of the cyan rubber suv less than the number of big suvs that are behind the red bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "737": { + "question_id": "737", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the perimeter of the parallelogram.\nChoices:\n(A) 32\n(B) 39\n(C) 46\n(D) 78", + "choices": [ + "32", + "39", + "46", + "78" + ], + "answer": "78", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "32", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 179, + "img_width": 352, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "739": { + "question_id": "739", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Hannah need to buy a baking dish and a cookie jar? (Unit: $)", + "choices": null, + "answer": "23", + "extraction": "24", + "prediction": "24", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 160, + "img_width": 201, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "741": { + "question_id": "741", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "13", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1080, + "img_width": 1920, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "743": { + "question_id": "743", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the different between the highest unemployment rate and the lowest?", + "choices": null, + "answer": "10.53", + "extraction": "1.7", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "745": { + "question_id": "745", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2832, + "img_width": 4256, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "747": { + "question_id": "747", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\odot M$, $FL=24,HJ=48$, and $m \\widehat {HP}=65$. Find $m \\widehat {HJ}$.\nChoices:\n(A) 65\n(B) 120\n(C) 130\n(D) 155", + "choices": [ + "65", + "120", + "130", + "155" + ], + "answer": "130", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 467, + "img_width": 507, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "749": { + "question_id": "749", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b3ABC, DE \u2225 BC, if AB = 7.0, AC = 5.0, AD = 3.0, then DE = ()\nChoices:\n(A) \\frac{15}{4}cm\n(B) \\frac{20}{3}cm\n(C) \\frac{15}{7}cm\n(D) \\frac{20}{7}cm", + "choices": [ + "\\frac{15}{4}cm", + "\\frac{20}{3}cm", + "\\frac{15}{7}cm", + "\\frac{20}{7}cm" + ], + "answer": "\\frac{20}{7}cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{15}{4}cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 98, + "img_width": 181, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "751": { + "question_id": "751", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would most likely happen if Artemia was removed?\nChoices:\n(A) Seahorses would decrease\n(B) Rotifers would decrease\n(C) Mysids would decrease\n(D) Algae would decrease", + "choices": [ + "Seahorses would decrease", + "Rotifers would decrease", + "Mysids would decrease", + "Algae would decrease" + ], + "answer": "Seahorses would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Seahorses would decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 363, + "img_width": 862, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "753": { + "question_id": "753", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "755": { + "question_id": "755", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this function most likely be?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a polynomial", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 776, + "img_width": 1430, + "language": "english", + "skills": [ + "algebraic reasoning", + "statistical reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "757": { + "question_id": "757", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x to the nearest tenth. Assume that segments that appear to be tangent are tangent.\nChoices:\n(A) 7.2\n(B) 8\n(C) 12\n(D) 15", + "choices": [ + "7.2", + "8", + "12", + "15" + ], + "answer": "7.2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7.2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 165, + "img_width": 220, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "759": { + "question_id": "759", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 201, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "761": { + "question_id": "761", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens to the crayfish population if the Largemouth Bass and Northern Pike populations decrease?\nChoices:\n(A) Nothing\n(B) Decrease\n(C) Slightly Decrease\n(D) Increase", + "choices": [ + "Nothing", + "Decrease", + "Slightly Decrease", + "Increase" + ], + "answer": "Increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Nothing", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 319, + "img_width": 405, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "763": { + "question_id": "763", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny shiny balls. Subtract all purple objects. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "765": { + "question_id": "765", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Chartreuse the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 514, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "767": { + "question_id": "767", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the maximum value of y?", + "choices": null, + "answer": "5", + "extraction": "25", + "prediction": "25", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 429, + "img_width": 483, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "769": { + "question_id": "769", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagram below is a model of two solutions. Each blue ball represents one particle of solute. Which solution has a higher concentration of blue particles?\nChoices:\n(A) neither; their concentrations are the same\n(B) Solution A\n(C) Solution B", + "choices": [ + "neither; their concentrations are the same", + "Solution A", + "Solution B" + ], + "answer": "Solution A", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; their concentrations are the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "elementary school", + "img_height": 251, + "img_width": 378, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "771": { + "question_id": "771", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Base your answers on the diagram of a food chain below and on your knowledge of science. If the population of snakes increases, the population of frogs will most likely\nChoices:\n(A) decrease\n(B) remain the same\n(C) increase\n(D) None", + "choices": [ + "decrease", + "remain the same", + "increase", + "None" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 720, + "img_width": 960, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "773": { + "question_id": "773", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, point D is on the extended line of AB, passing point D is the tangent of \u2299O, and the tangent point is C, if \u2220A = 25.0, then \u2220D = ()\nChoices:\n(A) 25\u00b0\n(B) 40\u00b0\n(C) 50\u00b0\n(D) 65\u00b0", + "choices": [ + "25\u00b0", + "40\u00b0", + "50\u00b0", + "65\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 117, + "img_width": 163, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "775": { + "question_id": "775", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 724, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "777": { + "question_id": "777", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In rhombus LMPQ, $m \\angle Q L M=2 x^{2}-10$, $m \\angle Q P M=8 x$, and $M P=10$ . \r\nFind the perimeter of $LMPQ$\nChoices:\n(A) 10\n(B) 40\n(C) 70\n(D) 140", + "choices": [ + "10", + "40", + "70", + "140" + ], + "answer": "40", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 177, + "img_width": 337, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "779": { + "question_id": "779", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the cardiac silhouette less than half the diameter of the diaphragm?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "medical image", + "grade": "college", + "img_height": 841, + "img_width": 1023, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "VQA-RAD", + "split": "testmini", + "task": "visual question answering" + }, + "781": { + "question_id": "781", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\triangle CDF$, $K$ is the centroid and $DK=16$. Find $CD$.\nChoices:\n(A) 9\n(B) 12\n(C) 18\n(D) 18", + "choices": [ + "9", + "12", + "18", + "18" + ], + "answer": "18", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "9", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 540, + "img_width": 461, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "783": { + "question_id": "783", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In order to measure the width of parallel river AB, \u2220ACB = 30.0, \u2220ADB = 60.0, CD = 60.0, then the width of the river AB is ()\nChoices:\n(A) 30m\n(B) 30\u221a{3}m\n(C) (30\u221a{3}+30)m\n(D) (30\u221a{3}-30)m", + "choices": [ + "30m", + "30\u221a{3}m", + "(30\u221a{3}+30)m", + "(30\u221a{3}-30)m" + ], + "answer": "30\u221a{3}m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 87, + "img_width": 130, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "785": { + "question_id": "785", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Part of an ecosystem is shown in this diagram. Imagine the algae and floating plants are prevented from growing. How will that most likely affect this ecosystem?\nChoices:\n(A) The number of ducks will increase\n(B) The number of minnows will increase\n(C) There will be no effect on this ecosystem\n(D) The number of aquatic crustaceans will decrease", + "choices": [ + "The number of ducks will increase", + "The number of minnows will increase", + "There will be no effect on this ecosystem", + "The number of aquatic crustaceans will decrease" + ], + "answer": "The number of aquatic crustaceans will decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "The number of ducks will increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 258, + "img_width": 456, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "787": { + "question_id": "787", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of the zebra's stripes are horizontal?", + "choices": null, + "answer": "50", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "789": { + "question_id": "789", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the values of posse and mortar?", + "choices": null, + "answer": "10", + "extraction": "10", + "prediction": "10", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "791": { + "question_id": "791", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Given $V_s$ = 5V, $R_1$ = 1k\u03a9, $R_2$ = 2.2k\u03a9, $R_3$ = 2.2k\u03a9, $R_4$ = 1.5k\u03a9, and $R_L$ = 4.7k\u03a9. Determine the voltage and current across $R_L$. Answer in unit of V (3 sig.fig.).", + "choices": null, + "answer": "1.06", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 400, + "img_width": 444, + "language": "english", + "skills": [ + "algebraic reasoning", + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "793": { + "question_id": "793", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest Elo score for the agent using an offline RL algorithm?", + "choices": null, + "answer": "1578", + "extraction": "177", + "prediction": "177", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1056, + "img_width": 1922, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "795": { + "question_id": "795", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "75", + "extraction": "30", + "prediction": "30", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 601, + "img_width": 475, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "797": { + "question_id": "797", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the missing pattern in the picture?\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4\n(E) 5\n(F) 6", + "choices": [ + "1", + "2", + "3", + "4", + "5", + "6" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 291, + "img_width": 386, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "799": { + "question_id": "799", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Ruth need to buy a baking dish, a casserole dish, and an ice cream scoop? (Unit: $)", + "choices": null, + "answer": "13", + "extraction": "13", + "prediction": "13", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 128, + "img_width": 229, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "801": { + "question_id": "801", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A gymnast jotted down the number of cartwheels she did each day. What is the mode of the numbers?'", + "choices": null, + "answer": "10", + "extraction": "9", + "prediction": "9", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 280, + "img_width": 272, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "803": { + "question_id": "803", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "805": { + "question_id": "805", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the donut more than half eaten?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 434, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "807": { + "question_id": "807", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following leaf shapes would have the least amount of wind resistance and water loss?\nChoices:\n(A) Truncate\n(B) Acuminate\n(C) Rounded\n(D) Sagittate", + "choices": [ + "Truncate", + "Acuminate", + "Rounded", + "Sagittate" + ], + "answer": "Acuminate", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Truncate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 300, + "img_width": 508, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "809": { + "question_id": "809", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In a group of horses, some individuals have a black coat and others have a reddish-brown coat. In this group, the gene for the coat color trait has two alleles. The allele for a black coat (L) is dominant over the allele for a reddish-brown coat (l).\nThis Punnett square shows a cross between two horses. What is the expected ratio of offspring with a reddish-brown coat to offspring with a black coat? Choose the most likely ratio.\nChoices:\n(A) 1:3\n(B) 4:0\n(C) 3:1\n(D) 0:4\n(E) 2:2", + "choices": [ + "1:3", + "4:0", + "3:1", + "0:4", + "2:2" + ], + "answer": "2:2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1:3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 241, + "img_width": 233, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "811": { + "question_id": "811", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A machine at the candy factory dispensed different numbers of lemon-flavored candies into various bags. What is the smallest number of lemon-flavored candies? (Unit: lemon-flavored candies)", + "choices": null, + "answer": "34", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 136, + "img_width": 247, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "813": { + "question_id": "813", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest value on the X axis?", + "choices": null, + "answer": "30", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2264, + "img_width": 1768, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "815": { + "question_id": "815", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle N C L$\nChoices:\n(A) 60\n(B) 120\n(C) 240\n(D) 360", + "choices": [ + "60", + "120", + "240", + "360" + ], + "answer": "120", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 279, + "img_width": 367, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "817": { + "question_id": "817", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the straight line a \u2225 b, the point B is on the straight line b, and AB \u22a5 BC, \u22202 = 65.0, then the degree of \u22201 is ()\nChoices:\n(A) 65\u00b0\n(B) 25\u00b0\n(C) 35\u00b0\n(D) 45\u00b0", + "choices": [ + "65\u00b0", + "25\u00b0", + "35\u00b0", + "45\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 94, + "img_width": 171, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "819": { + "question_id": "819", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the value of $t$ in the parallelogram.\nChoices:\n(A) 6\n(B) 7\n(C) 8\n(D) 13", + "choices": [ + "6", + "7", + "8", + "13" + ], + "answer": "7", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 400, + "img_width": 428, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "821": { + "question_id": "821", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are most of the people young men?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 360, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "823": { + "question_id": "823", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: You can see how organisms are interconnected from the diagram given. What will be the effect if all the Killer whales are removed?\nChoices:\n(A) The population of tuna will increase\n(B) Mouse will decrease in number\n(C) The phytoplankton will decrease\n(D) The grasshopper will die", + "choices": [ + "The population of tuna will increase", + "Mouse will decrease in number", + "The phytoplankton will decrease", + "The grasshopper will die" + ], + "answer": "The population of tuna will increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "The population of tuna will increase", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1080, + "img_width": 1152, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "825": { + "question_id": "825", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of metallic road bikes that are behind the large bus less than the number of small matte double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "827": { + "question_id": "827", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer for the missing picture.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "D", + "extraction": "E", + "prediction": "E", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 1138, + "img_width": 828, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "829": { + "question_id": "829", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which matchstick needs to be moved in order to create a square?\nChoices:\n(A) Top\n(B) Bottom\n(C) Left\n(D) Right\n(E) Not possible", + "choices": [ + "Top", + "Bottom", + "Left", + "Right", + "Not possible" + ], + "answer": "Left", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Top", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 396, + "img_width": 378, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "831": { + "question_id": "831", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An author recorded how many words she wrote in the past 3 days. How many words in total did the author write on Thursday and Friday? (Unit: words)", + "choices": null, + "answer": "679", + "extraction": "635", + "prediction": "635", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 156, + "img_width": 236, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "833": { + "question_id": "833", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Phenylalanine (Phe, 5) is a naturally occurring amino acid. What is the energy of interaction between its phenyl group and the electric dipole moment of a neighbouring peptide group? Take the distance between the groups as $4.0 \\mathrm{~nm}$ and treat the phenyl group as a benzene molecule. The magnitude of the dipole moment of the peptide group is $\\mu=1.3 \\mathrm{D}$ and the polarizability volume of benzene is $\\alpha^{\\prime}=1.04 \\times 10^{-29} \\mathrm{~m}^3$.", + "choices": null, + "answer": "-4.3", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 372, + "img_width": 474, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "835": { + "question_id": "835", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percent of people are wearing blue?", + "choices": null, + "answer": "0", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "837": { + "question_id": "837", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tiny red motorbikes than big red choppers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "839": { + "question_id": "839", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many years have value less than 10%?", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "841": { + "question_id": "841", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Some friends compared the sizes of their stuffed animal collections. What is the median of the numbers?'", + "choices": null, + "answer": "9", + "extraction": "9", + "prediction": "9", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 265, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "843": { + "question_id": "843", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua greater than Red?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 752, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "845": { + "question_id": "845", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 390, + "img_width": 550, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "847": { + "question_id": "847", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which function grows the fastest as x increases?\nChoices:\n(A) red\n(B) purple\n(C) blue", + "choices": [ + "red", + "purple", + "blue" + ], + "answer": "red", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "red", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1294, + "img_width": 1706, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "849": { + "question_id": "849", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The 4 8x8 images shown below are encoded with JPEG coding. Based on their expected DCT (Discrete Cosine Transform) coefficients, Which image has the most non-zero AC coefficients? (a): Image A, (b): Image B, (c): Image C, (d): Image D.\nChoices:\n(A) (c)\n(B) (d)\n(C) (a)\n(D) (b)\n(E) (e)", + "choices": [ + "(c)", + "(d)", + "(a)", + "(b)", + "(e)" + ], + "answer": "(b)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(c)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 282, + "img_width": 940, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "851": { + "question_id": "851", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the net concessional disbursements from imf greater than 32000000 US$?", + "choices": null, + "answer": "2", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1139, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "853": { + "question_id": "853", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the diamond ABCD, \u2220BAD = 120.0, the length of the diagonal AC is 3.0, then the perimeter of the diamond ABCD is ()\nChoices:\n(A) 3\n(B) 6\n(C) 9\n(D) 12", + "choices": [ + "3", + "6", + "9", + "12" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 98, + "img_width": 169, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "855": { + "question_id": "855", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $x$ so that $a \u2225 b$.\nChoices:\n(A) 2.5\n(B) 14\n(C) 15\n(D) 16", + "choices": [ + "2.5", + "14", + "15", + "16" + ], + "answer": "14", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 250, + "img_width": 536, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "857": { + "question_id": "857", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "859": { + "question_id": "859", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "27", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 603, + "img_width": 750, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "861": { + "question_id": "861", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Crimson less than Gray?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 680, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "863": { + "question_id": "863", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Rhode Island have the lowest value in the USA ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "865": { + "question_id": "865", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Hot Pink have the lowest value?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 512, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "867": { + "question_id": "867", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A food industry researcher compiled the revenues of several pizzerias. How much did Dan's Deep Dish make from pizza sales? (Unit: $)", + "choices": null, + "answer": "22", + "extraction": "14", + "prediction": "14", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 465, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "869": { + "question_id": "869", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large yellow matte cubes. Subtract all metal things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "871": { + "question_id": "871", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 200, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "873": { + "question_id": "873", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many groups of bars contain at least one bar with value smaller than 40?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "875": { + "question_id": "875", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow things. Subtract all blue cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "877": { + "question_id": "877", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms squad and warm?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "879": { + "question_id": "879", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large gray rubber things. Subtract all small blue spheres. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "881": { + "question_id": "881", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the population of grasshopper decreases, the population of mouse will most likely do what?\nChoices:\n(A) decrease\n(B) remain the same\n(C) increase\n(D) NA", + "choices": [ + "decrease", + "remain the same", + "increase", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "883": { + "question_id": "883", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "15", + "extraction": "18", + "prediction": "18", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 207, + "img_width": 868, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "885": { + "question_id": "885", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Grayson counted the number of pieces of pepperoni on each pizza he made. What is the smallest number of pieces of pepperoni? (Unit: pieces of pepperoni)", + "choices": null, + "answer": "18", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 136, + "img_width": 225, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "887": { + "question_id": "887", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u25b3ABC is the inscribed triangle of \u2299O. If \u2220ABC = 70.0, then the degree of \u2220AOC is equal to ()\nChoices:\n(A) 140\u00b0\n(B) 130\u00b0\n(C) 120\u00b0\n(D) 110\u00b0", + "choices": [ + "140\u00b0", + "130\u00b0", + "120\u00b0", + "110\u00b0" + ], + "answer": "140\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "140\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 106, + "img_width": 119, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "889": { + "question_id": "889", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Purple the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 472, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "891": { + "question_id": "891", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracy lower than 8 in at least one dataset?", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "893": { + "question_id": "893", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the limit of the blue function as x approaches negative infinity?", + "choices": null, + "answer": "0", + "extraction": "-4", + "prediction": "-4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 331, + "img_width": 327, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "895": { + "question_id": "895", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model has the lowest Audio-Audio Similarity and Text-Audio Similarity scores overall?\nChoices:\n(A) MusicLDM (mix-up)\n(B) MusicLDM (original)\n(C) MusicLDM (BLM)\n(D) MusicLDM (BAM)\n(E) MuBERT", + "choices": [ + "MusicLDM (mix-up)", + "MusicLDM (original)", + "MusicLDM (BLM)", + "MusicLDM (BAM)", + "MuBERT" + ], + "answer": "MuBERT", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "MusicLDM (mix-up)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "violin plot", + "grade": "college", + "img_height": 682, + "img_width": 1882, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "897": { + "question_id": "897", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use a calculator to find the measure of $\u2220J$ to the nearest degree.\nChoices:\n(A) 33\n(B) 40\n(C) 50\n(D) 57", + "choices": [ + "33", + "40", + "50", + "57" + ], + "answer": "40", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "33", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 223, + "img_width": 352, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "899": { + "question_id": "899", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number comes next?", + "choices": null, + "answer": "2123", + "extraction": "1357", + "prediction": "1357", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 185, + "img_width": 406, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "901": { + "question_id": "901", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all shiny spheres. Subtract all big red matte spheres. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "903": { + "question_id": "903", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, if \u2220ABC = 30.0, then the degree of \u2220AOC is ()\nChoices:\n(A) 30\u00b0\n(B) 45\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "30\u00b0", + "45\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "60\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 112, + "img_width": 110, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "905": { + "question_id": "905", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of large red cars behind the metal car less than the number of blue matte tandem bikes that are behind the big blue rubber utility bike?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "907": { + "question_id": "907", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When the military expenditure value was lower than 0.2%?", + "choices": null, + "answer": "1970", + "extraction": "1970", + "prediction": "1970", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "909": { + "question_id": "909", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b3ABC, DE \u2225 BC, if AD = 1.0, DB = 2.0, then the value of \\frac ADAB is ()\nChoices:\n(A) \\frac{2}{3}\n(B) \\frac{1}{4}\n(C) \\frac{1}{3}\n(D) \\frac{1}{2}", + "choices": [ + "\\frac{2}{3}", + "\\frac{1}{4}", + "\\frac{1}{3}", + "\\frac{1}{2}" + ], + "answer": "\\frac{1}{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{2}{3}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 132, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "911": { + "question_id": "911", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the smaller picture below the larger picture?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "913": { + "question_id": "913", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Cyan have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 763, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "915": { + "question_id": "915", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to the Lion population if the Gum Tree population decreased?\nChoices:\n(A) Unable to determine.\n(B) Nothing would happen.\n(C) It would also decrease.\n(D) It would increase.", + "choices": [ + "Unable to determine.", + "Nothing would happen.", + "It would also decrease.", + "It would increase." + ], + "answer": "It would also decrease.", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Unable to determine.", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 740, + "img_width": 528, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "917": { + "question_id": "917", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the ratio of the number of procedures to register a business in 2004 to that in 2007?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 939, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "919": { + "question_id": "919", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many items sold more than 3 units in at least one store?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "921": { + "question_id": "921", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x to the nearest tenth. Assume that segments that appear to be tangent are tangent.\nChoices:\n(A) 5\n(B) 8.1\n(C) 10.3\n(D) 21.6", + "choices": [ + "5", + "8.1", + "10.3", + "21.6" + ], + "answer": "21.6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 170, + "img_width": 226, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "923": { + "question_id": "923", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model achieves the highest score in terms of Rec?\nChoices:\n(A) Transformers Agent (GPT-4)\n(B) LLaMA-Adapter v2-7B\n(C) LLaVA-7B\n(D) Otter-9B \n(E) MM-ReAct-GPT-3.5\n(F) LLaVA-13B (LLaMA-2)\n(G) MM-ReAct-GPT-4", + "choices": [ + "Transformers Agent (GPT-4)", + "LLaMA-Adapter v2-7B", + "LLaVA-7B", + "Otter-9B ", + "MM-ReAct-GPT-3.5", + "LLaVA-13B (LLaMA-2)", + "MM-ReAct-GPT-4" + ], + "answer": "LLaVA-13B (LLaMA-2)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Transformers Agent (GPT-4)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1056, + "img_width": 1910, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "925": { + "question_id": "925", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Haley went to the store. She bought 3+9/10 pounds of pumpernickel bread crumbs. How much did she spend? (Unit: $)", + "choices": null, + "answer": "19.5", + "extraction": "15.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 130, + "img_width": 334, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "927": { + "question_id": "927", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0cAB\u7684\u5782\u76f4\u5e73\u5206\u7ebf\u4ea4AB\u4e8e\u70b9D\uff0c\u4ea4BC\u4e8e\u70b9E\uff0c\u8fde\u63a5AE\uff0e\u82e5AB\uff1d6\uff0c\u25b3ACE\u7684\u5468\u957f\u4e3a13\uff0c\u5219\u25b3ABC\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 19\n(B) 16\n(C) 29\n(D) 18", + "choices": [ + "19", + "16", + "29", + "18" + ], + "answer": "19", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "19", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 152, + "img_width": 199, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "929": { + "question_id": "929", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Tim need to buy a mystery game and a toy rocket? (Unit: $)", + "choices": null, + "answer": "85", + "extraction": "32", + "prediction": "32", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 192, + "img_width": 226, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "931": { + "question_id": "931", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u25b3ABC is the inscribed triangle of \u2299O, AB is the diameter of \u2299O, point D is a point on \u2299O, if \u2220ACD = 40.0, then the size of \u2220BAD is ()\nChoices:\n(A) 35\u00b0\n(B) 50\u00b0\n(C) 40\u00b0\n(D) 60\u00b0", + "choices": [ + "35\u00b0", + "50\u00b0", + "40\u00b0", + "60\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 123, + "img_width": 124, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "933": { + "question_id": "933", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Hector need to buy a European vacation package and an Australian vacation package? (Unit: $)", + "choices": null, + "answer": "9606", + "extraction": "1796", + "prediction": "1796", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 160, + "img_width": 344, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "935": { + "question_id": "935", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0cAD\uff1d6\uff0cAB\uff1d4\uff0cDE\u5e73\u5206\u2220ADC\u4ea4BC\u4e8e\u70b9E\uff0c\u5219BE\u7684\u957f\u662f\uff08\uff09\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5", + "choices": [ + "2", + "3", + "4", + "5" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 81, + "img_width": 140, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "937": { + "question_id": "937", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Periwinkle the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 785, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "939": { + "question_id": "939", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would be most affected if the clams all died?\nChoices:\n(A) squid\n(B) lantern fish\n(C) octopus\n(D) sea horse", + "choices": [ + "squid", + "lantern fish", + "octopus", + "sea horse" + ], + "answer": "octopus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "squid", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 764, + "img_width": 1162, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "941": { + "question_id": "941", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which is the next number in the series?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 327, + "img_width": 271, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "943": { + "question_id": "943", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between two consecutive major ticks on the Y-axis ?", + "choices": null, + "answer": "2", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1258, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "945": { + "question_id": "945", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 451, + "img_width": 610, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "947": { + "question_id": "947", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u2225CD\uff0cBC\u2225DE\uff0c\u2220A\uff1d45\u00b0\uff0c\u2220C\uff1d110\u00b0\uff0c\u5219\u2220AED\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 95\u00b0\n(B) 105\u00b0\n(C) 115\u00b0\n(D) 125\u00b0", + "choices": [ + "95\u00b0", + "105\u00b0", + "115\u00b0", + "125\u00b0" + ], + "answer": "115\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "95\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 170, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "949": { + "question_id": "949", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the combined percentage of Lowest ROI and Medium ROI in SEO?", + "choices": null, + "answer": "56", + "extraction": "25", + "prediction": "25", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "951": { + "question_id": "951", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $x$.\nChoices:\n(A) 10.25\n(B) 12.75\n(C) 18.75\n(D) 25.5", + "choices": [ + "10.25", + "12.75", + "18.75", + "25.5" + ], + "answer": "12.75", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10.25", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 427, + "img_width": 487, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "953": { + "question_id": "953", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of trees have leaves?", + "choices": null, + "answer": "50", + "extraction": "50", + "prediction": "50", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "955": { + "question_id": "955", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0e\u70b9O\u662f\u6b63\u4e94\u8fb9\u5f62ABCDE\u7684\u4e2d\u5fc3\uff0c\u2299O\u662f\u6b63\u4e94\u8fb9\u5f62\u7684\u5916\u63a5\u5706\uff0c\u2220ADE\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 30\u00b0\n(B) 32\u00b0\n(C) 36\u00b0\n(D) 40\u00b0", + "choices": [ + "30\u00b0", + "32\u00b0", + "36\u00b0", + "40\u00b0" + ], + "answer": "36\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 136, + "img_width": 136, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "957": { + "question_id": "957", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big brown buss behind the gray matte aeroplane greater than the number of yellow shiny scooters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "959": { + "question_id": "959", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The teachers at an elementary school counted how many desks they had in their classrooms. What is the median of the numbers?'", + "choices": null, + "answer": "32", + "extraction": "32", + "prediction": "32", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 230, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "961": { + "question_id": "961", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest value in blue bar?", + "choices": null, + "answer": "7", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "963": { + "question_id": "963", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For what x does f reach its local maximum?", + "choices": null, + "answer": "3", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 397, + "img_width": 441, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "965": { + "question_id": "965", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: whats the lowest number yard line that you can see?", + "choices": null, + "answer": "30", + "extraction": "30", + "prediction": "30", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 690, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "967": { + "question_id": "967", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the amount earned from national visitors greater than the average amount earned from national visitors taken over all years ?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1146, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "969": { + "question_id": "969", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Yellow Green have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 587, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "971": { + "question_id": "971", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can the boy reach the highest book?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "973": { + "question_id": "973", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many zeros does this function have?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 2039, + "img_width": 2560, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "975": { + "question_id": "975", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown matte objects. Subtract all blue metallic objects. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "977": { + "question_id": "977", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5df2\u77e5AB\u2225CD\uff0cAF\u4e0eCD\u4ea4\u4e8e\u70b9E\uff0cBE\u22a5AF\uff0c\u2220B\uff1d65\u00b0\uff0c\u5219\u2220DEF\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 65\u00b0\n(B) 5\u00b0\n(C) 15\u00b0\n(D) 25\u00b0", + "choices": [ + "65\u00b0", + "5\u00b0", + "15\u00b0", + "25\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 129, + "img_width": 250, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "979": { + "question_id": "979", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9079", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 279, + "img_width": 634, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "981": { + "question_id": "981", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the sum of 2002, 2003 and 2004?", + "choices": null, + "answer": "70.4", + "extraction": "6006.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "983": { + "question_id": "983", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest accuracy reported in the whole chart?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "985": { + "question_id": "985", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the smallest percentage value recorded in the chart?", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "987": { + "question_id": "987", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A cross-section of an airplane wing is shown. Measurements of the thickness of the wing, in centimeters, at 20-centimeter intervals are 5.8, 20.3, 26.7, 29.0, 27.6, 27.3, 23.8, 20.5, 15.1, 8.7, and 2.8. Use the Midpoint Rule to estimate the area of the wing's cross-section.", + "choices": null, + "answer": "4232", + "extraction": "1200", + "prediction": "1200", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 338, + "img_width": 946, + "language": "english", + "skills": [ + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "989": { + "question_id": "989", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people like the most preferred object in the whole chart?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "991": { + "question_id": "991", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which functions have no zeros?\nChoices:\n(A) green and red\n(B) blue and orange\n(C) purple and grey", + "choices": [ + "green and red", + "blue and orange", + "purple and grey" + ], + "answer": "purple and grey", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "green and red", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 383, + "img_width": 616, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "993": { + "question_id": "993", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny yellow matte cylinders. Subtract all small blue rubber spheres. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "995": { + "question_id": "995", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the sum of the two bars in 18 to 21?", + "choices": null, + "answer": "18.27", + "extraction": "1.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "997": { + "question_id": "997", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is kx^2/2 larger than E at x=0?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 197, + "img_width": 537, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "999": { + "question_id": "999", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: The magnitude of the acceleration vector a is $10 \\mathrm{~cm} / \\mathrm{s}^2$. Use the figure to estimate the tangential components of $\\mathbf{a}$.", + "choices": null, + "answer": "4.5", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 484, + "img_width": 478, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "2": { + "question_id": "2", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the total volume of the measuring cup? (Unit: g)", + "choices": null, + "answer": "1000", + "extraction": "1000", + "prediction": "1000", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 768, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "4": { + "question_id": "4", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large rubber spheres. Subtract all big shiny cylinders. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "6": { + "question_id": "6", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, if CB = 4.0, DB = 7.0, and D is the midpoint of AC, then the length of AC is ()\nChoices:\n(A) 6cm\n(B) 7cm\n(C) 4cm\n(D) 5cm", + "choices": [ + "6cm", + "7cm", + "4cm", + "5cm" + ], + "answer": "6cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6cm", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 30, + "img_width": 203, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "8": { + "question_id": "8", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny gray bicycles that are on the left side of the brown metal sedan greater than the number of things that are to the left of the tiny green bicycle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "10": { + "question_id": "10", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which object comes next?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "E", + "extraction": "D", + "prediction": "D", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 418, + "img_width": 376, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "12": { + "question_id": "12", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer metallic fighters than rubber objects?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "14": { + "question_id": "14", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny objects that are behind the small metal jet less than the number of tiny things left of the tiny sedan?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "16": { + "question_id": "16", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many items sold less than 5 units in at least one store?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "18": { + "question_id": "18", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The passage below describes an experiment. Read the passage and then follow the instructions below.\n\nLinda applied a thin layer of wax to the underside of her snowboard and rode the board straight down a hill. Then, she removed the wax and rode the snowboard straight down the hill again. She repeated the rides four more times, alternating whether she rode with a thin layer of wax on the board or not. Her friend Bob timed each ride. Linda and Bob calculated the average time it took to slide straight down the hill on the snowboard with wax compared to the average time on the snowboard without wax.\nFigure: snowboarding down a hill. Identify the question that Linda and Bob's experiment can best answer.\nChoices:\n(A) Does Linda's snowboard slide down a hill in less time when it has a thin layer of wax or a thick layer of wax?\n(B) Does Linda's snowboard slide down a hill in less time when it has a layer of wax or when it does not have a layer of wax?", + "choices": [ + "Does Linda's snowboard slide down a hill in less time when it has a thin layer of wax or a thick layer of wax?", + "Does Linda's snowboard slide down a hill in less time when it has a layer of wax or when it does not have a layer of wax?" + ], + "answer": "Does Linda's snowboard slide down a hill in less time when it has a layer of wax or when it does not have a layer of wax?", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Does Linda's snowboard slide down a hill in less time when it has a thin layer of wax or a thick layer of wax?", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "elementary school", + "img_height": 232, + "img_width": 302, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "20": { + "question_id": "20", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sum of smallest two bar is greater then the largest bar?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "22": { + "question_id": "22", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 785, + "img_width": 555, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "24": { + "question_id": "24", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 709, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "26": { + "question_id": "26", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Black greater than Deep Sky Blue?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 761, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "28": { + "question_id": "28", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{AB}$ is a diameter, $AC=8$ inches, and $BC=15$ inches. Find the radius of the circle.\nChoices:\n(A) 7.5\n(B) 8\n(C) 8.5\n(D) 17", + "choices": [ + "7.5", + "8", + "8.5", + "17" + ], + "answer": "8.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 431, + "img_width": 519, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "30": { + "question_id": "30", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the two chords AB and CD in the circle intersect at E, \u2220D = 35.0, \u2220AEC = 105.0, then \u2220C = ()\nChoices:\n(A) 60\u00b0\n(B) 70\u00b0\n(C) 80\u00b0\n(D) 85\u00b0", + "choices": [ + "60\u00b0", + "70\u00b0", + "80\u00b0", + "85\u00b0" + ], + "answer": "70\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 113, + "img_width": 117, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "32": { + "question_id": "32", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0cAB\uff1dAC\uff0c\u2220CAB\uff1d40\u00b0\uff0c\u5219\u2220D\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 40\u00b0\n(B) 50\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "40\u00b0", + "50\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "70\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 100, + "img_width": 168, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "34": { + "question_id": "34", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function continuous at each point?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 479, + "img_width": 479, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "36": { + "question_id": "36", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "9", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 800, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "38": { + "question_id": "38", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values smaller than 6?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "40": { + "question_id": "40", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown blocks. Subtract all large blue rubber things. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "42": { + "question_id": "42", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 539, + "img_width": 401, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "44": { + "question_id": "44", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Chase wants to buy 4 kilograms of oval beads and 5 kilograms of star-shaped beads. How much will he spend? (Unit: $)", + "choices": null, + "answer": "18", + "extraction": "14", + "prediction": "14", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 226, + "img_width": 305, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "46": { + "question_id": "46", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to the population of adult spiders if predator ate all the spider eggs?\nChoices:\n(A) Adult spider population would remain the same\n(B) Adult spider population would double.\n(C) Adults spider population would decrease\n(D) Adult spider population would increase.", + "choices": [ + "Adult spider population would remain the same", + "Adult spider population would double.", + "Adults spider population would decrease", + "Adult spider population would increase." + ], + "answer": "Adults spider population would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Adult spider population would remain the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 829, + "img_width": 1024, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "48": { + "question_id": "48", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle 3$.\nChoices:\n(A) 28\n(B) 38\n(C) 52\n(D) 62", + "choices": [ + "28", + "38", + "52", + "62" + ], + "answer": "38", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "28", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 426, + "img_width": 596, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "50": { + "question_id": "50", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the food web, what would likely happen if the number of large roach would decrease?\nChoices:\n(A) The population of steelheads would decrease.\n(B) The population of stickleback fry would increase.\n(C) The population of predatory insects would increase.\n(D) The population of predatory insects would decrease.", + "choices": [ + "The population of steelheads would decrease.", + "The population of stickleback fry would increase.", + "The population of predatory insects would increase.", + "The population of predatory insects would decrease." + ], + "answer": "The population of predatory insects would decrease.", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "The population of steelheads would decrease.", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 600, + "img_width": 633, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "52": { + "question_id": "52", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big red metallic spheres. Subtract all big brown matte things. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "54": { + "question_id": "54", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, the ratio of the length of line AB to the length of line AC is 2 : 5. If AC = 25, what is the length of line AB?\nChoices:\n(A) 8\n(B) 10\n(C) 15\n(D) 18\n(E) 20", + "choices": [ + "8", + "10", + "15", + "18", + "20" + ], + "answer": "10", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "8", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 310, + "img_width": 433, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "56": { + "question_id": "56", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the rectangle?", + "choices": null, + "answer": "6", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 295, + "img_width": 202, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "58": { + "question_id": "58", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Firebrick have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 760, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "60": { + "question_id": "60", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "22", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 381, + "img_width": 477, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "62": { + "question_id": "62", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cE\uff0cF\u5206\u522b\u662f\u83f1\u5f62ABCD\u7684\u8fb9AB\uff0cAD\u7684\u4e2d\u70b9\uff0c\u4e14AB\uff1d5\uff0cAC\uff1d6\uff0e\u5219EF\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 4\n(B) 5\n(C) 5.5\n(D) 6", + "choices": [ + "4", + "5", + "5.5", + "6" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 138, + "img_width": 160, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "64": { + "question_id": "64", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagrams below show two pure samples of gas in identical closed, rigid containers. Each colored ball represents one gas particle. Both samples have the same number of particles. Compare the average kinetic energies of the particles in each sample. Which sample has the higher temperature?\nChoices:\n(A) neither; the samples have the same temperature\n(B) sample A\n(C) sample B", + "choices": [ + "neither; the samples have the same temperature", + "sample A", + "sample B" + ], + "answer": "sample A", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; the samples have the same temperature", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "elementary school", + "img_height": 405, + "img_width": 550, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "66": { + "question_id": "66", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer for the missing picture.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "A", + "extraction": "A", + "prediction": "A", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 562, + "img_width": 320, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "68": { + "question_id": "68", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5c06\u4e00\u6839\u957f\u5ea6\u4e3a16cm\u81ea\u7136\u4f38\u76f4\u7684\u5f39\u6027\u76ae\u7b4bAB\u4e24\u7aef\u56fa\u5b9a\u5728\u6c34\u5e73\u7684\u684c\u9762\u4e0a\uff0c\u7136\u540e\u628a\u4e2d\u70b9C\u7ad6\u76f4\u5411\u4e0a\u62c9\u53476cm\u81f3D\u70b9\uff08\u5982\u56fe\uff09\uff0c\u5219\u8be5\u5f39\u6027\u76ae\u7b4b\u88ab\u62c9\u957f\u4e86\uff08\uff09\nChoices:\n(A) 2cm\n(B) 4cm\n(C) 6cm\n(D) 8cm", + "choices": [ + "2cm", + "4cm", + "6cm", + "8cm" + ], + "answer": "4cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 84, + "img_width": 252, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "70": { + "question_id": "70", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "8", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2600, + "img_width": 2266, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "72": { + "question_id": "72", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A real estate agent drove around the neighborhood and counted the number of houses on each block. How many blocks have exactly 36 houses? (Unit: blocks)", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 136, + "img_width": 197, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "74": { + "question_id": "74", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the difference of largest and smallest bar?", + "choices": null, + "answer": "47.6", + "extraction": "100.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "76": { + "question_id": "76", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens to fish if pelicans increase?\nChoices:\n(A) decrease\n(B) nothing\n(C) increase\n(D) none of the above", + "choices": [ + "decrease", + "nothing", + "increase", + "none of the above" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 947, + "img_width": 850, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "78": { + "question_id": "78", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Find the missing value.", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 394, + "img_width": 1062, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "80": { + "question_id": "80", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the food web, what will happen if all the algae died due to pesticides?\nChoices:\n(A) Crabs and limpets will decrease\n(B) Dolphins will increase\n(C) Sea gulls will become extinct\n(D) Star fish will increase", + "choices": [ + "Crabs and limpets will decrease", + "Dolphins will increase", + "Sea gulls will become extinct", + "Star fish will increase" + ], + "answer": "Crabs and limpets will decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Crabs and limpets will decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 199, + "img_width": 372, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "82": { + "question_id": "82", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A square is inscribed in a circle of area 18$\\pi$ square units. Find the length of a side of the square.\nChoices:\n(A) 3\n(B) 3 \\sqrt 2\n(C) 6\n(D) 6 \\sqrt 2", + "choices": [ + "3", + "3 \\sqrt 2", + "6", + "6 \\sqrt 2" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 202, + "img_width": 200, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "84": { + "question_id": "84", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: ABCD is a square. Inscribed Circle center is O. Find the the angle of \u2220AMK. Return the numeric value.", + "choices": null, + "answer": "130.9", + "extraction": "120.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 1220, + "img_width": 1194, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "86": { + "question_id": "86", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model has the highest Acc score when Pretrain Loss is equal to 1.80?\nChoices:\n(A) ICL\n(B) SFT\n(C) SFT 1/8\n(D) RFT k=100\n(E) RFT k=25\n(F) RET k=6\n(G) RFT U13B", + "choices": [ + "ICL", + "SFT", + "SFT 1/8", + "RFT k=100", + "RFT k=25", + "RET k=6", + "RFT U13B" + ], + "answer": "RFT U13B", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "ICL", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 1046, + "img_width": 1734, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "88": { + "question_id": "88", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A square is tangent to a line at point P in the figure above. What is the value of x?", + "choices": null, + "answer": "30", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 277, + "img_width": 442, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "90": { + "question_id": "90", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow matte blocks. Subtract all tiny brown cylinders. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "5", + "prediction": "5", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "92": { + "question_id": "92", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do the windows have a geometric shape that most houses have?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 375, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "94": { + "question_id": "94", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cD\u4e3a\u25b3ABC\u5185\u4e00\u70b9\uff0cCD\u5e73\u5206\u2220ACB\uff0cBD\u22a5CD\uff0c\u2220A\uff1d\u2220ABD\uff0c\u82e5\u2220DBC\uff1d54\u00b0\uff0c\u5219\u2220A\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 36\u00b0\n(B) 44\u00b0\n(C) 27\u00b0\n(D) 54\u00b0", + "choices": [ + "36\u00b0", + "44\u00b0", + "27\u00b0", + "54\u00b0" + ], + "answer": "27\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "36\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 74, + "img_width": 160, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "96": { + "question_id": "96", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: How many times Dissatisfied more than satisfied?", + "choices": null, + "answer": "3.9", + "extraction": "3.8", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 328, + "img_width": 186, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "98": { + "question_id": "98", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Find the value of the square in the figure.", + "choices": null, + "answer": "2", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 506, + "img_width": 900, + "language": "english", + "skills": [ + "logical reasoning", + "algebraic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "100": { + "question_id": "100", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of all the values in the ruling group?", + "choices": null, + "answer": "12", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "102": { + "question_id": "102", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The shape is made of unit squares. What is the area of the shape?", + "choices": null, + "answer": "6", + "extraction": "36", + "prediction": "36", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 156, + "img_width": 106, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "104": { + "question_id": "104", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?", + "choices": null, + "answer": "0.8", + "extraction": "0.8", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "106": { + "question_id": "106", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values smaller than 1?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "108": { + "question_id": "108", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Find out the average of the bottom two countries ??", + "choices": null, + "answer": "51.04", + "extraction": "40.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "110": { + "question_id": "110", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sum of two lowest bar is greater then the largest bar?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "112": { + "question_id": "112", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big cyan airliners less than the number of gray shiny utility bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "114": { + "question_id": "114", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, KL is tangent to $\\odot M$ at K. Find the value of x.\nChoices:\n(A) 6.00\n(B) 9.45\n(C) 18.9\n(D) 37.8", + "choices": [ + "6.00", + "9.45", + "18.9", + "37.8" + ], + "answer": "9.45", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6.00", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 273, + "img_width": 347, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "116": { + "question_id": "116", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which leaf has the most veins?\nChoices:\n(A) Acuminate\n(B) Truncate\n(C) Mucronate\n(D) Acute", + "choices": [ + "Acuminate", + "Truncate", + "Mucronate", + "Acute" + ], + "answer": "Acuminate", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Acuminate", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 187, + "img_width": 350, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "118": { + "question_id": "118", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the maximum value of this function?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 296, + "img_width": 600, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "120": { + "question_id": "120", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the degree of this function?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 320, + "img_width": 312, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "122": { + "question_id": "122", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer yellow regular buss than small yellow metallic school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "124": { + "question_id": "124", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: This type of leaf arrangement consists of at least three leaves attached to a node.\nChoices:\n(A) Whorled\n(B) Simple\n(C) Opposite\n(D) Alternate", + "choices": [ + "Whorled", + "Simple", + "Opposite", + "Alternate" + ], + "answer": "Whorled", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Whorled", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 225, + "img_width": 576, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "126": { + "question_id": "126", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the leftmost and the rigtmost person? (Unit: years)", + "choices": null, + "answer": "9", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 800, + "img_width": 623, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "128": { + "question_id": "128", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large metal blocks. Subtract all yellow cylinders. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "130": { + "question_id": "130", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1403, + "img_width": 1063, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "132": { + "question_id": "132", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u57284\u00d74\u7684\u6b63\u65b9\u5f62\u7f51\u683c\u4e2d\uff0c\u6bcf\u4e2a\u5c0f\u6b63\u65b9\u5f62\u7684\u8fb9\u957f\u5747\u4e3a1\uff0c\u70b9A\uff0cB\uff0cC\u90fd\u5728\u683c\u70b9\u4e0a\uff0cAD\u22a5BC\u4e8eD\uff0c\u5219AD\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 1.5\n(C) 2\n(D) \\frac{7}{3}", + "choices": [ + "1", + "1.5", + "2", + "\\frac{7}{3}" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 160, + "img_width": 155, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "134": { + "question_id": "134", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: People can use the engineering-design process to develop solutions to problems. One step in the process is testing if a potential solution meets the requirements of the design.\nThe passage below describes how the engineering-design process was used to test a solution to a problem. Read the passage. Then answer the question below.\n\nCooper was a landscape architect who was hired to design a new city park. The city council wanted the park to have space for outdoor concerts and to have at least 20% of the park shaded by trees. Cooper thought the concert area should be at least 150 meters from the road so traffic noise didn't interrupt the music. He developed three possible designs for the park with the concert area in a different location in each design. Then, he tested each design by measuring the distance between the road and the concert area.\nFigure: studying an architect's design. Which of the following could Cooper's test show?\nChoices:\n(A) if at least 20% of the park would be shaded by trees in each design\n(B) which design would have the least traffic noise in the concert area\n(C) which design would have the greatest distance between the concert area and the road", + "choices": [ + "if at least 20% of the park would be shaded by trees in each design", + "which design would have the least traffic noise in the concert area", + "which design would have the greatest distance between the concert area and the road" + ], + "answer": "which design would have the greatest distance between the concert area and the road", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "if at least 20% of the park would be shaded by trees in each design", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "high school", + "img_height": 232, + "img_width": 302, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "136": { + "question_id": "136", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest and the lowest value of blue bar?", + "choices": null, + "answer": "64", + "extraction": "15", + "prediction": "15", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 443, + "img_width": 415, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "138": { + "question_id": "138", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sandwich cut in half?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "140": { + "question_id": "140", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which food has the least carbs?\nChoices:\n(A) soup\n(B) water\n(C) sandwich\n(D) buns", + "choices": [ + "soup", + "water", + "sandwich", + "buns" + ], + "answer": "soup", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "soup", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 428, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "142": { + "question_id": "142", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is it split in half?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 425, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "144": { + "question_id": "144", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Natalie buys 4.6 kilograms of turmeric. What is the total cost? (Unit: $)", + "choices": null, + "answer": "13.8", + "extraction": "18.7", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 162, + "img_width": 210, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "146": { + "question_id": "146", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Kimberly's classmates revealed how many science articles they read. What is the range of the numbers?'", + "choices": null, + "answer": "4", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 286, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "148": { + "question_id": "148", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which leaf shape has the smallest base?\nChoices:\n(A) Hastate\n(B) Cordate\n(C) Sagittate\n(D) Decurrent", + "choices": [ + "Hastate", + "Cordate", + "Sagittate", + "Decurrent" + ], + "answer": "Decurrent", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Hastate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 161, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "150": { + "question_id": "150", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A, B, and C are three points on \u2299O, and the straight line CD and \u2299O are tangent to point C. If \u2220DCB = 40.0, then the degree of \u2220CAB is ()\nChoices:\n(A) 40\u00b0\n(B) 50\u00b0\n(C) 80\u00b0\n(D) 100\u00b0", + "choices": [ + "40\u00b0", + "50\u00b0", + "80\u00b0", + "100\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 144, + "img_width": 110, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "152": { + "question_id": "152", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfl1\u2225l2\uff0c\u5c06\u542b30\u00b0\u89d2\u7684\u76f4\u89d2\u4e09\u89d2\u677f\u6309\u5982\u56fe\u65b9\u5f0f\u653e\u7f6e\uff0c\u76f4\u89d2\u9876\u70b9\u5728l2\u4e0a\uff0c\u82e5\u22201\uff1d76\u00b0\uff0c\u5219\u22202\uff1d\uff08\uff09\nChoices:\n(A) 36\u00b0\n(B) 45\u00b0\n(C) 44\u00b0\n(D) 64\u00b0", + "choices": [ + "36\u00b0", + "45\u00b0", + "44\u00b0", + "64\u00b0" + ], + "answer": "44\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "36\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 208, + "img_width": 229, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "154": { + "question_id": "154", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this an odd function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 744, + "img_width": 1114, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "156": { + "question_id": "156", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the limit of the as x approaches 1 from the left side?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 291, + "img_width": 327, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "158": { + "question_id": "158", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 685, + "img_width": 911, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "160": { + "question_id": "160", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x.\nChoices:\n(A) 10\n(B) 11\n(C) 12\n(D) 13", + "choices": [ + "10", + "11", + "12", + "13" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 227, + "img_width": 270, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "162": { + "question_id": "162", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The bird watcher counted the number of birds in each flock that passed overhead. How many flocks had at least 17 birds but fewer than 33 birds? (Unit: flocks)", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 202, + "img_width": 117, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "164": { + "question_id": "164", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b1ABCD, CE \u22a5 AB, point E is the foot of perpendicular, if \u2220D = 55.0, then \u2220BCE = ()\nChoices:\n(A) 55\u00b0\n(B) 35\u00b0\n(C) 25\u00b0\n(D) 30\u00b0", + "choices": [ + "55\u00b0", + "35\u00b0", + "25\u00b0", + "30\u00b0" + ], + "answer": "35\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "55\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 84, + "img_width": 161, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "166": { + "question_id": "166", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which Shape is missing?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "B", + "extraction": "D", + "prediction": "D", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 816, + "img_width": 2028, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "168": { + "question_id": "168", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given that the Hue-Saturation subspace shown in Fig. Q2 is a perfect circle and that colors A, B and C can be represented as the 3 points shown in the subspace. Which color has the smallest saturation coefficient?\nChoices:\n(A) (c)\n(B) (a)\n(C) (e)\n(D) (d)\n(E) (b)", + "choices": [ + "(c)", + "(a)", + "(e)", + "(d)", + "(b)" + ], + "answer": "(b)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(c)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 454, + "img_width": 414, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "170": { + "question_id": "170", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: f(-1) is ____ f(0).\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "smaller than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 296, + "img_width": 600, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "172": { + "question_id": "172", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Seafoam less than Dark Salmon?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 524, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "174": { + "question_id": "174", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tiny cyan suvs that are behind the aeroplane than cyan utility bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "176": { + "question_id": "176", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $RS$ if $\\triangle QRS$ is an equilateral triangle.\nChoices:\n(A) 0.5\n(B) 1\n(C) 1.5\n(D) 2", + "choices": [ + "0.5", + "1", + "1.5", + "2" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 292, + "img_width": 305, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "178": { + "question_id": "178", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9A\u3001C\u5728\u2220FBD\u7684\u4e24\u6761\u8fb9BF\u3001BD\u4e0a\uff0cBE\u5e73\u5206\u2220FBD\uff0cCE\u5e73\u5206\u2220ACD\uff0c\u8fde\u63a5AE\uff0c\u82e5\u2220BEC\uff1d35\u00b0\uff0c\u5219\u2220FAE\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 35\u00b0\n(B) 45\u00b0\n(C) 55\u00b0\n(D) 65\u00b0", + "choices": [ + "35\u00b0", + "45\u00b0", + "55\u00b0", + "65\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 99, + "img_width": 129, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "180": { + "question_id": "180", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny brown cylinders. Subtract all tiny brown objects. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "182": { + "question_id": "182", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Green greater than Yellow?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 589, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "184": { + "question_id": "184", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values smaller than 0?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "186": { + "question_id": "186", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, CD is a plane mirror, the light is emitted from point A, reflected by point E on CD, and irradiated to point B. If the incident angle is \u03b1, AC \u22a5 CD, BD \u22a5 CD, the feet of perpendicular are C, D, and AC = 3.0, BD = 6.0, CD = 10.0, then the length of the line segment ED is ()\nChoices:\n(A) \\frac{20}{3}\n(B) \\frac{10}{3}\n(C) 7\n(D) \\frac{14}{3}", + "choices": [ + "\\frac{20}{3}", + "\\frac{10}{3}", + "7", + "\\frac{14}{3}" + ], + "answer": "\\frac{20}{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{20}{3}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 112, + "img_width": 183, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "188": { + "question_id": "188", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many methods in the table achieve an A-847 score higher than 20.0?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 634, + "img_width": 2226, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "190": { + "question_id": "190", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 132, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "192": { + "question_id": "192", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the diameter CD of \u2299O crosses the midpoint G of chord EF, \u2220DCF = 20.0, then \u2220EOD is equal to ()\nChoices:\n(A) 10\u00b0\n(B) 20\u00b0\n(C) 40\u00b0\n(D) 80\u00b0", + "choices": [ + "10\u00b0", + "20\u00b0", + "40\u00b0", + "80\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 127, + "img_width": 101, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "194": { + "question_id": "194", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: On average, how many people can commute on this vehicle?", + "choices": null, + "answer": "50", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 408, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "196": { + "question_id": "196", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\u6240\u793a\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u5df2\u77e5\u70b9D\uff0cE\uff0cF\u5206\u522b\u4e3a\u8fb9BC\uff0cAD\uff0cCE\u7684\u4e2d\u70b9\uff0c\u4e14S\u25b3ABC\uff1d4cm2\uff0c\u5219S\u25b3DEF\u7b49\u4e8e\uff08\uff09\nChoices:\n(A) 2cm2\n(B) 1cm2\n(C) 0.5cm2\n(D) 0.25cm2", + "choices": [ + "2cm2", + "1cm2", + "0.5cm2", + "0.25cm2" + ], + "answer": "0.5cm2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2cm2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 81, + "img_width": 110, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "198": { + "question_id": "198", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Calculate the missing value.\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4", + "choices": [ + "1", + "2", + "3", + "4" + ], + "answer": "1", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 756, + "img_width": 890, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "200": { + "question_id": "200", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Sky Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 404, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "202": { + "question_id": "202", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "204": { + "question_id": "204", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: \u0627\u0632 \u0633\u0645\u062a \u0631\u0627\u0633\u062a \u062a\u0635\u0648\u06cc\u0631 \u062f\u0631\u0628 \u062f\u0648\u0645 \u0686\u0646\u062f \u0634\u06cc\u0634\u0647 \u0628\u062f\u0648\u0646 \u0631\u0646\u06af \u062f\u0627\u0631\u0647\u061f", + "choices": null, + "answer": "12", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 376, + "img_width": 564, + "language": "persian", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "ParsVQA-Caps", + "split": "testmini", + "task": "visual question answering" + }, + "206": { + "question_id": "206", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the scale factor from $Q$ to $Q'$.\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5", + "choices": [ + "2", + "3", + "4", + "5" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 611, + "img_width": 731, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "208": { + "question_id": "208", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the leftmost and the rigtmost person? (Unit: years)", + "choices": null, + "answer": "5", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 195, + "img_width": 300, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "210": { + "question_id": "210", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 370, + "img_width": 493, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "212": { + "question_id": "212", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cornflower the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 403, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "214": { + "question_id": "214", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of amount earned from merchandise imports in Canada greater than the average percentage of amount earned from merchandise imports in Canada taken over all years ?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1109, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "216": { + "question_id": "216", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people like the most preferred object in the whole chart?", + "choices": null, + "answer": "90", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "218": { + "question_id": "218", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large red rubber blocks. Subtract all tiny red matte objects. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "220": { + "question_id": "220", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u2299O is the circumscribed circle of the quadrilateral ABCD, if \u2220O = 110.0, then the degree of \u2220C is ()\nChoices:\n(A) 125\u00b0\n(B) 120\u00b0\n(C) 105\u00b0\n(D) 90\u00b0", + "choices": [ + "125\u00b0", + "120\u00b0", + "105\u00b0", + "90\u00b0" + ], + "answer": "125\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "125\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 128, + "img_width": 124, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "222": { + "question_id": "222", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue shiny spheres. Subtract all big blue shiny cubes. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "224": { + "question_id": "224", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this a periodic function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 744, + "img_width": 1114, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "226": { + "question_id": "226", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past three.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "228": { + "question_id": "228", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of circle O, DB and DC are respectively tangent to circle O at points B and C. If \u2220ACE = 25.0, then the degree of \u2220D is ()\nChoices:\n(A) 50\u00b0\n(B) 55\u00b0\n(C) 60\u00b0\n(D) 65\u00b0", + "choices": [ + "50\u00b0", + "55\u00b0", + "60\u00b0", + "65\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 97, + "img_width": 137, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "230": { + "question_id": "230", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracy higher than 9 in at least one dataset?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "232": { + "question_id": "232", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagram below is a model of two solutions. Each pink ball represents one particle of solute. Which solution has a higher concentration of pink particles?\nChoices:\n(A) neither; their concentrations are the same\n(B) Solution B\n(C) Solution A", + "choices": [ + "neither; their concentrations are the same", + "Solution B", + "Solution A" + ], + "answer": "Solution B", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; their concentrations are the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 251, + "img_width": 378, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "234": { + "question_id": "234", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure shown above, AC = 6. What is the length of segment AB?\nChoices:\n(A) 3\n(B) 5\n(C) 6\n(D) 7\n(E) It cannot be determined from the information given", + "choices": [ + "3", + "5", + "6", + "7", + "It cannot be determined from the information given" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 378, + "img_width": 434, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "236": { + "question_id": "236", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $z$.\nChoices:\n(A) 7\n(B) 9\n(C) 12\n(D) 15", + "choices": [ + "7", + "9", + "12", + "15" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 423, + "img_width": 447, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "238": { + "question_id": "238", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find PT\nChoices:\n(A) 6\n(B) \\frac { 20 } { 3 }\n(C) 7\n(D) 22 / 3", + "choices": [ + "6", + "\\frac { 20 } { 3 }", + "7", + "22 / 3" + ], + "answer": "\\frac { 20 } { 3 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 250, + "img_width": 238, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "240": { + "question_id": "240", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2387, + "img_width": 3500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "242": { + "question_id": "242", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle A$ of quadrilateral ABCD\nChoices:\n(A) 45\n(B) 90\n(C) 135\n(D) 180", + "choices": [ + "45", + "90", + "135", + "180" + ], + "answer": "135", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 381, + "img_width": 621, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "244": { + "question_id": "244", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Aqua have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 500, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "246": { + "question_id": "246", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Assume that all gases are perfect and that data refer to 298 K unless otherwise stated. In 1995, the Intergovernmental Panel on Climate Change (IPCC) considered a global average temperature rise of $1.0-3.5^{\\circ} \\mathrm{C}$ likely by the year 2100 , with $2.0^{\\circ} \\mathrm{C}$ its best estimate. Because water vapour is itself a greenhouse gas, the increase in water vapour content of the atmosphere is of some concern to climate change experts. Predict the relative increase in water vapour in the atmosphere based on a temperature rises of $2.0 \\mathrm{~K}$, assuming that the relative humidity remains constant. (The present global mean temperature is $290 \\mathrm{~K}$, and the equilibrium vapour pressure of water at that temperature is 0.0189 bar.)", + "choices": null, + "answer": "13", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 216, + "img_width": 1098, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "248": { + "question_id": "248", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of green matte choppers greater than the number of large yellow shiny motorbikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "250": { + "question_id": "250", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The area $A$ of the shaded region is given. Find $x$. $A = 66$ cm$^2$ .\nChoices:\n(A) 4.6\n(B) 6.5\n(C) 13.0\n(D) 26.0", + "choices": [ + "4.6", + "6.5", + "13.0", + "26.0" + ], + "answer": "13.0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4.6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 286, + "img_width": 303, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "252": { + "question_id": "252", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Consider the infinitely long chain of resistors shown below. What is the resistance between terminals a and b if R=1?", + "choices": null, + "answer": "0.73", + "extraction": "0.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 169, + "img_width": 463, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "254": { + "question_id": "254", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big objects that are in front of the metal fighter less than the number of things that are behind the big metallic bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "256": { + "question_id": "256", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u25b3ABC\u4e2d\uff0cAD\u5e73\u5206\u2220BAC\uff0cAD\u4ea4BC\u4e8e\u70b9D\uff0cDE\u22a5AB\uff0c\u5782\u8db3\u4e3aE\uff0c\u82e5DE\uff1d3\uff0cAC\uff1d4\uff0c\u5219\u25b3ADC\u7684\u9762\u79ef\u4e3a\uff08\uff09\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 75, + "img_width": 148, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "258": { + "question_id": "258", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An employee at the craft store counted the number of red buttons in each bag of mixed buttons. How many bags had at least 60 red buttons but fewer than 81 red buttons?'", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 224, + "img_width": 156, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "260": { + "question_id": "260", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the derivative of the function positive between [1, 2] assuming that it's differentiable?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 368, + "img_width": 412, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "262": { + "question_id": "262", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between genres of tv shows watched by highest female and lowest female?", + "choices": null, + "answer": "39", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 756, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "264": { + "question_id": "264", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For Group C, in which week is the cumulative increase in weight , the highest?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2237, + "img_width": 1754, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "266": { + "question_id": "266", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which has the most uneven shape?\nChoices:\n(A) oblique\n(B) obtuse\n(C) cordate\n(D) truncate", + "choices": [ + "oblique", + "obtuse", + "cordate", + "truncate" + ], + "answer": "oblique", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "oblique", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 225, + "img_width": 240, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "268": { + "question_id": "268", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Colton wants to buy 1+3/10 kilograms of English muffins. How much will he spend? (Unit: $)", + "choices": null, + "answer": "10.4", + "extraction": "12.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 194, + "img_width": 273, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "270": { + "question_id": "270", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A and B are three points on \u2299O and AB = AC. Connect BO and CO, if \u2220ABC = 65.0, then the degree of \u2220BOC is ()\nChoices:\n(A) 50\u00b0\n(B) 65\u00b0\n(C) 100\u00b0\n(D) 130\u00b0", + "choices": [ + "50\u00b0", + "65\u00b0", + "100\u00b0", + "130\u00b0" + ], + "answer": "100\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 114, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "272": { + "question_id": "272", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time does the clock show?\nChoices:\n(A) 9:30\n(B) 1:30\n(C) 4:30\n(D) 5:30\n(E) 11:30", + "choices": [ + "9:30", + "1:30", + "4:30", + "5:30", + "11:30" + ], + "answer": "4:30", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "9:30", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 261, + "img_width": 261, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "274": { + "question_id": "274", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u3001BC\u3001CD\u3001DA\u90fd\u662f\u2299O\u7684\u5207\u7ebf\uff0c\u5df2\u77e5AD\uff1d2\uff0cBC\uff1d5\uff0c\u5219AB+CD\u7684\u503c\u662f\uff08\uff09\nChoices:\n(A) 14\n(B) 12\n(C) 9\n(D) 7", + "choices": [ + "14", + "12", + "9", + "7" + ], + "answer": "7", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "14", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 119, + "img_width": 151, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "276": { + "question_id": "276", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, it is known that the radius of \u2299O is 5.0 and the chord AB = 8.0, then the distance from the center O to AB is ()\nChoices:\n(A) 1mm\n(B) 2mm\n(C) 3mm\n(D) 4mm", + "choices": [ + "1mm", + "2mm", + "3mm", + "4mm" + ], + "answer": "3mm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1mm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 102, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "278": { + "question_id": "278", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the following objects, which one has the best PSNR score?\nChoices:\n(A) Lego\n(B) Mats\n(C) Mic\n(D) Ship", + "choices": [ + "Lego", + "Mats", + "Mic", + "Ship" + ], + "answer": "Mic", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Lego", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 940, + "img_width": 1478, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "280": { + "question_id": "280", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, ABCDEF is a regular hexagon, and its center is point O. What is the value of x?\nChoices:\n(A) 80\n(B) 60\n(C) 40\n(D) 30\n(E) 20", + "choices": [ + "80", + "60", + "40", + "30", + "20" + ], + "answer": "60", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "80", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 123, + "img_width": 130, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "282": { + "question_id": "282", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percent of the sun is showing?", + "choices": null, + "answer": "100", + "extraction": "100", + "prediction": "100", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "284": { + "question_id": "284", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the accuracy of the algorithm with lowest accuracy?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "286": { + "question_id": "286", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5c06\u4e00\u6839\u957f\u5ea6\u4e3a8cm\uff0c\u81ea\u7136\u4f38\u76f4\u7684\u5f39\u6027\u76ae\u7b4bAB\u4e24\u7aef\u56fa\u5b9a\u5728\u6c34\u5e73\u7684\u684c\u9762\u4e0a\uff0c\u7136\u540e\u628a\u76ae\u7b4b\u4e2d\u70b9C\u7ad6\u76f4\u5411\u4e0a\u62c9\u53473cm\u5230\u70b9D\uff0c\u5219\u6b64\u65f6\u8be5\u5f39\u6027\u76ae\u7b4b\u88ab\u62c9\u957f\u4e86\uff08\uff09\nChoices:\n(A) 6cm\n(B) 5cm\n(C) 4cm\n(D) 2cm", + "choices": [ + "6cm", + "5cm", + "4cm", + "2cm" + ], + "answer": "2cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 82, + "img_width": 250, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "288": { + "question_id": "288", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which of the following value ranges of \u03bb2 does the percentage of Attack Effectiveness begin to be lower than that of Diversity?\nChoices:\n(A) 0.0 - 0.2\n(B) 0.2 - 0.4\n(C) 0.4 - 0.6\n(D) 0.6 - 0.8\n(E) 0.8 - 1.0", + "choices": [ + "0.0 - 0.2", + "0.2 - 0.4", + "0.4 - 0.6", + "0.6 - 0.8", + "0.8 - 1.0" + ], + "answer": "0.0 - 0.2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.0 - 0.2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 606, + "img_width": 2144, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "290": { + "question_id": "290", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5e73\u884c\u7ebfAB\uff0cCD\u88ab\u76f4\u7ebfAE\u6240\u622a\uff0e\u82e5\u22201\uff1d105\u00b0\uff0c\u5219\u22202\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 75\u00b0\n(B) 85\u00b0\n(C) 95\u00b0\n(D) 105\u00b0", + "choices": [ + "75\u00b0", + "85\u00b0", + "95\u00b0", + "105\u00b0" + ], + "answer": "75\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "75\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 119, + "img_width": 132, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "292": { + "question_id": "292", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Rebecca Purple greater than Olive Drab?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 461, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "294": { + "question_id": "294", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: In Fig. 21-25, the particles have charges $q_1=-q_2=100 \\mathrm{nC}$ and $q_3=-q_4=200 \\mathrm{nC}$, and distance $a=$ $5.0 \\mathrm{~cm}$. What is the $x$ component of the net electrostatic force on particle 3?", + "choices": null, + "answer": "0.17", + "extraction": "-0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 293, + "img_width": 247, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "296": { + "question_id": "296", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The value of f(-3) is ____ the value of f(2)\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "equal to", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 776, + "img_width": 1430, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "298": { + "question_id": "298", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A decrease in rabbits would affect whose food source?\nChoices:\n(A) mountain lion\n(B) producer\n(C) decomposer\n(D) energy", + "choices": [ + "mountain lion", + "producer", + "decomposer", + "energy" + ], + "answer": "mountain lion", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "mountain lion", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 699, + "img_width": 768, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "300": { + "question_id": "300", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{HK}$ and $\\overline{IG}$ are diameters of $\\odot L$. Find $m \\widehat {IHJ}$.\nChoices:\n(A) 59\n(B) 135\n(C) 270\n(D) 301", + "choices": [ + "59", + "135", + "270", + "301" + ], + "answer": "270", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "59", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 492, + "img_width": 510, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "302": { + "question_id": "302", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the green curve?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a logarithmic function", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 300, + "img_width": 531, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "304": { + "question_id": "304", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In the figure above, two line segments meet at a point on line l. If the value of y is equal to the square of the value of x, what is the value of y?", + "choices": null, + "answer": "100", + "extraction": "16", + "prediction": "16", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 247, + "img_width": 431, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "306": { + "question_id": "306", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the bed much larger than the kitten?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "308": { + "question_id": "308", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this function most likely be?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a trigonometric function", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 276, + "img_width": 482, + "language": "english", + "skills": [ + "algebraic reasoning", + "statistical reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "310": { + "question_id": "310", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find z\nChoices:\n(A) 10\n(B) \\frac { 32 } { 3 }\n(C) \\frac { 40 } { 3 }\n(D) \\frac { 50 } { 3 }", + "choices": [ + "10", + "\\frac { 32 } { 3 }", + "\\frac { 40 } { 3 }", + "\\frac { 50 } { 3 }" + ], + "answer": "\\frac { 40 } { 3 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 218, + "img_width": 350, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "312": { + "question_id": "312", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: An Idaho farmer has been monitoring crop prices over time. In 2003, which crop cost the most per cwt?'\nChoices:\n(A) potatoes\n(B) peas\n(C) apples\n(D) canola", + "choices": [ + "potatoes", + "peas", + "apples", + "canola" + ], + "answer": "apples", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "potatoes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 204, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "314": { + "question_id": "314", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Crimson the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 522, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "316": { + "question_id": "316", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, given that points A, B, and C are on \u2299O, \u2220AOB = 100.0, then the degree of \u2220ACB is ()\nChoices:\n(A) 50\u00b0\n(B) 80\u00b0\n(C) 100\u00b0\n(D) 200\u00b0", + "choices": [ + "50\u00b0", + "80\u00b0", + "100\u00b0", + "200\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 105, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "318": { + "question_id": "318", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the area of the figure. Round to the nearest tenth if necessary.\nChoices:\n(A) 191.5\n(B) 1128\n(C) 2256\n(D) 4512", + "choices": [ + "191.5", + "1128", + "2256", + "4512" + ], + "answer": "2256", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "191.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 175, + "img_width": 239, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "320": { + "question_id": "320", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u2220C\uff1d90\u00b0\uff0cAB\uff1d13\uff0cAC\uff1d5\uff0cD\u3001E\u5206\u522b\u662fAC\u3001AB\u7684\u4e2d\u70b9\uff0c\u5219DE\u7684\u957f\u662f\uff08\uff09\nChoices:\n(A) 6.5\n(B) 6\n(C) 5.5\n(D) \\frac{\u221a{119}}{2}", + "choices": [ + "6.5", + "6", + "5.5", + "\\frac{\u221a{119}}{2}" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 90, + "img_width": 170, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "322": { + "question_id": "322", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cA\uff0cB\u4e24\u70b9\u88ab\u6c60\u5858\u9694\u5f00\uff0c\u5728AB\u5916\u9009\u4e00\u70b9C\uff0c\u4f7f\u70b9C\u80fd\u76f4\u63a5\u5230\u8fbe\u70b9A\u548c\u70b9B\uff0c\u8fde\u63a5AC\u548cBC\uff0c\u5e76\u5206\u522b\u627e\u51faAC\u548cBC\u7684\u4e2d\u70b9M\uff0cN\uff0e\u5982\u679c\u6d4b\u5f97MN\uff1d20m\uff0c\u90a3\u4e48A\uff0cB\u4e24\u70b9\u7684\u8ddd\u79bb\u662f\uff08\uff09\nChoices:\n(A) 10m\n(B) 20m\n(C) 35m\n(D) 40m", + "choices": [ + "10m", + "20m", + "35m", + "40m" + ], + "answer": "40m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 107, + "img_width": 148, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "324": { + "question_id": "324", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between highest and lowest value of dark blue bar?", + "choices": null, + "answer": "53", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 726, + "img_width": 800, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "326": { + "question_id": "326", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the pencil to the nearest inch. The pencil is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "7", + "prediction": "7", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 170, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "328": { + "question_id": "328", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of accuracies of the algorithm candy for all the datasets?", + "choices": null, + "answer": "18", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "330": { + "question_id": "330", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny cubes. Subtract all brown balls. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "332": { + "question_id": "332", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A taxi cab driver tracked how many miles he drove each month. How many miles did the taxi cab driver drive in total in January and April? (Unit: miles)", + "choices": null, + "answer": "7873", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 125, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "334": { + "question_id": "334", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer yellow metal tandem bikes in front of the small yellow metallic bicycle than metal bicycles on the left side of the large brown jet?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "336": { + "question_id": "336", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest individual bar in the whole chart?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "338": { + "question_id": "338", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle ABC above, AB = AC, E is the midpoint of line AB, and D is the midpoint of line AC. If AE = x and ED = 4, what is length BC?\nChoices:\n(A) 6\n(B) 8\n(C) 2*x\n(D) 4*x\n(E) 4*x^2", + "choices": [ + "6", + "8", + "2*x", + "4*x", + "4*x^2" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 167, + "img_width": 121, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "340": { + "question_id": "340", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following domains has the most number of BPE Tokens?\nChoices:\n(A) Legal \n(B) Code \n(C) Conversational \n(D) Math \n(E) Science\n(F) Books \n(G) News \n(H) Encyclopedic", + "choices": [ + "Legal ", + "Code ", + "Conversational ", + "Math ", + "Science", + "Books ", + "News ", + "Encyclopedic" + ], + "answer": "Science", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Legal ", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1176, + "img_width": 2142, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "342": { + "question_id": "342", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, which of the following is the greatest?\nChoices:\n(A) a\n(B) b\n(C) c\n(D) d\n(E) e", + "choices": [ + "a", + "b", + "c", + "d", + "e" + ], + "answer": "d", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 299, + "img_width": 405, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "344": { + "question_id": "344", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of metal cars that are left of the tiny matte school bus greater than the number of tiny cyan double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "346": { + "question_id": "346", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the y-intercept of this function?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 339, + "img_width": 341, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "348": { + "question_id": "348", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are the pieces in triangle cuts?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 375, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "350": { + "question_id": "350", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "4", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 89, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "352": { + "question_id": "352", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people will fit in the smaller vehicle?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "354": { + "question_id": "354", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracies higher than 90?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "356": { + "question_id": "356", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer big motorbikes than rubber choppers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "358": { + "question_id": "358", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the cubes is the same as the unfolded cube?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "A", + "extraction": "D", + "prediction": "D", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 517, + "img_width": 326, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "360": { + "question_id": "360", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If $\\frac{I J}{X J}=\\frac{HJ}{YJ}, m \\angle W X J=130$\r\nand $m \\angle WZG=20,$ find $m \\angle YIZ$\nChoices:\n(A) 40\n(B) 50\n(C) 65\n(D) 110", + "choices": [ + "40", + "50", + "65", + "110" + ], + "answer": "50", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 370, + "img_width": 721, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "362": { + "question_id": "362", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all cyan cylinders. Subtract all tiny purple rubber objects. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "364": { + "question_id": "364", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, and points C and D are on \u2299O. If \u2220ABD = 50.0, then the degree of \u2220BCD is ()\nChoices:\n(A) 30\u00b0\n(B) 35\u00b0\n(C) 40\u00b0\n(D) 45\u00b0", + "choices": [ + "30\u00b0", + "35\u00b0", + "40\u00b0", + "45\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 114, + "img_width": 127, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "366": { + "question_id": "366", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "2", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 320, + "img_width": 250, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "368": { + "question_id": "368", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of yellow matte school buss greater than the number of big yellow metal cars?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "370": { + "question_id": "370", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram of the food web shown, if the number of ferns decrease, the supply of salmon will most likely?\nChoices:\n(A) decrease\n(B) can't tell\n(C) stay same\n(D) increase", + "choices": [ + "decrease", + "can't tell", + "stay same", + "increase" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 680, + "img_width": 880, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "372": { + "question_id": "372", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small gray spheres. Subtract all cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "374": { + "question_id": "374", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms calf and ivory?", + "choices": null, + "answer": "13", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "376": { + "question_id": "376", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all purple matte cubes. Subtract all tiny gray metal cubes. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "378": { + "question_id": "378", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAD\u662f\u25b3ABC\u7684\u4e2d\u7ebf\uff0cE\u4e3aAD\u7684\u4e2d\u70b9\uff0c\u25b3ABE\u7684\u9762\u79ef\u4e3a2\uff0c\u5219\u25b3ABC\u7684\u9762\u79ef\u4e3a\uff08\uff09\nChoices:\n(A) 5\n(B) 6\n(C) 7\n(D) 8", + "choices": [ + "5", + "6", + "7", + "8" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 118, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "380": { + "question_id": "380", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For how many years that the percentage value over 4?", + "choices": null, + "answer": "6", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "382": { + "question_id": "382", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the building through the window at least five stories tall?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 400, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "384": { + "question_id": "384", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 495, + "img_width": 626, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "386": { + "question_id": "386", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x\nChoices:\n(A) 5\n(B) 10\n(C) 10 \\sqrt { 3 }\n(D) 20", + "choices": [ + "5", + "10", + "10 \\sqrt { 3 }", + "20" + ], + "answer": "10 \\sqrt { 3 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 247, + "img_width": 164, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "388": { + "question_id": "388", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Express the ratio of $\\tan M$ as a decimal to the nearest hundredth.\nChoices:\n(A) 0.38\n(B) 0.42\n(C) 0.92\n(D) 2.40", + "choices": [ + "0.38", + "0.42", + "0.92", + "2.40" + ], + "answer": "0.42", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.38", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 209, + "img_width": 342, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "390": { + "question_id": "390", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer jets that are left of the small brown suv than objects right of the big shiny car?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "392": { + "question_id": "392", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Mr. Huffman, a P.E. teacher, wrote down how much weight each of his students could lift. How many people lifted at least 46 pounds? (Unit: people)", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 136, + "img_width": 197, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "394": { + "question_id": "394", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following environments has the least GPU days for training?\nChoices:\n(A) HomeGrid\n(B) Msgr S1\n(C) Msgr S2\n(D) Msgr S3\n(E) VLN\n(F) LangRoom", + "choices": [ + "HomeGrid", + "Msgr S1", + "Msgr S2", + "Msgr S3", + "VLN", + "LangRoom" + ], + "answer": "LangRoom", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "HomeGrid", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 858, + "img_width": 1854, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "396": { + "question_id": "396", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, if all the algae dies then water flea population will\nChoices:\n(A) remains the same\n(B) decrease\n(C) increase\n(D) NA", + "choices": [ + "remains the same", + "decrease", + "increase", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "remains the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 576, + "img_width": 720, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "398": { + "question_id": "398", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 942, + "img_width": 727, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "400": { + "question_id": "400", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At which Episode ID does the Retroformer attain its peak Success rate (%)?\nChoices:\n(A) 1.0\n(B) 1.5\n(C) 2.0\n(D) 2.5\n(E) 3.0\n(F) 3.5\n(G) 4.0", + "choices": [ + "1.0", + "1.5", + "2.0", + "2.5", + "3.0", + "3.5", + "4.0" + ], + "answer": "4.0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1.0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 942, + "img_width": 1196, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "402": { + "question_id": "402", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the food chain diagram below, which animal would most directly lack food if Grasshoppers get exterminated?\nChoices:\n(A) Rabbit\n(B) Deer\n(C) Frogs\n(D) Wolf", + "choices": [ + "Rabbit", + "Deer", + "Frogs", + "Wolf" + ], + "answer": "Frogs", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Rabbit", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 735, + "img_width": 909, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "404": { + "question_id": "404", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the following schedule. Which activity begins at 11.50 A.M.?'\nChoices:\n(A) figure skating practice\n(B) private class\n(C) adult class\n(D) children's class", + "choices": [ + "figure skating practice", + "private class", + "adult class", + "children's class" + ], + "answer": "children's class", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "figure skating practice", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 217, + "img_width": 325, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "406": { + "question_id": "406", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many snowmen are there?", + "choices": null, + "answer": "15", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 183, + "img_width": 714, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "408": { + "question_id": "408", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find z.\nChoices:\n(A) 6\n(B) 6 \\sqrt { 2 }\n(C) 6 \\sqrt { 3 }\n(D) 6 \\sqrt { 5 }", + "choices": [ + "6", + "6 \\sqrt { 2 }", + "6 \\sqrt { 3 }", + "6 \\sqrt { 5 }" + ], + "answer": "6 \\sqrt { 5 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 238, + "img_width": 362, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "410": { + "question_id": "410", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the perimeter of $\\triangle D E F,$ if $\\triangle D E F \\sim \\triangle C B F,$ perimeter of $\\triangle C B F=27, D F=6,$ and $F C=8$\nChoices:\n(A) 20.25\n(B) 21\n(C) 27\n(D) 36", + "choices": [ + "20.25", + "21", + "27", + "36" + ], + "answer": "20.25", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20.25", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 226, + "img_width": 405, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "412": { + "question_id": "412", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Tanner has $35. Does he have enough to buy a black jacket and a pair of shorts?'\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 192, + "img_width": 235, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "414": { + "question_id": "414", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If $ST=8, TR=4$, and $PT=6$, find $QR$.\nChoices:\n(A) 6\n(B) 8\n(C) 9\n(D) 10", + "choices": [ + "6", + "8", + "9", + "10" + ], + "answer": "9", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 386, + "img_width": 509, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "416": { + "question_id": "416", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the highest volume written on the blender?", + "choices": null, + "answer": "800", + "extraction": "1600", + "prediction": "1600", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1024, + "img_width": 768, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "418": { + "question_id": "418", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the number of grasshoppers decreases, what will the population of spiders most likely do?\nChoices:\n(A) remain the same\n(B) increase\n(C) decrease\n(D) NA", + "choices": [ + "remain the same", + "increase", + "decrease", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "remain the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "420": { + "question_id": "420", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the lowest value on the Y axis?", + "choices": null, + "answer": "0.0", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 1763, + "img_width": 2256, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "422": { + "question_id": "422", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "10", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "424": { + "question_id": "424", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the food half eaten?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 428, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "426": { + "question_id": "426", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u82e5DE\u662f\u25b3ABC\u7684\u4e2d\u4f4d\u7ebf\uff0c\u25b3ADE\u7684\u5468\u957f\u4e3a1\uff0c\u5219\u25b3ABC\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4", + "choices": [ + "1", + "2", + "3", + "4" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 154, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "428": { + "question_id": "428", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "28", + "extraction": "30", + "prediction": "30", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 968, + "img_width": 1259, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "430": { + "question_id": "430", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The derivative of f(x) at x=0 is ____ that at x=5\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "smaller than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 393, + "img_width": 552, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "432": { + "question_id": "432", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of undernourished male children greater than 0.4 %?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1085, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "434": { + "question_id": "434", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, side AC of triangle ABC is on line l. What is x in terms of k?\nChoices:\n(A) 60-k\n(B) k\n(C) 60+k\n(D) 120-k\n(E) 120-2*k", + "choices": [ + "60-k", + "k", + "60+k", + "120-k", + "120-2*k" + ], + "answer": "60-k", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60-k", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 157, + "img_width": 215, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "436": { + "question_id": "436", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracy lower than 8 in at least one dataset?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "438": { + "question_id": "438", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "13", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 367, + "img_width": 329, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "440": { + "question_id": "440", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the white plate half full?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 640, + "img_width": 480, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "442": { + "question_id": "442", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many objects are preferred by more than 7 people in at least one category?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "444": { + "question_id": "444", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the two genders?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "446": { + "question_id": "446", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u70b9D\u662f\u25b3ABC\u7684\u5185\u5fc3\uff0c\u8fde\u63a5DB\uff0cDC\uff0c\u8fc7\u70b9D\u4f5cEF\u2225BC\u5206\u522b\u4ea4AB\u3001AC\u4e8e\u70b9E\u3001F\uff0c\u82e5BE+CF\uff1d8\uff0c\u5219EF\u7684\u957f\u5ea6\u4e3a\uff08\uff09\nChoices:\n(A) 4\n(B) 5\n(C) 8\n(D) 16", + "choices": [ + "4", + "5", + "8", + "16" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 105, + "img_width": 144, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "448": { + "question_id": "448", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year recorded the highest share of Urban secondary schools with access to electricity in India?", + "choices": null, + "answer": "2016", + "extraction": "2012", + "prediction": "2012", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "450": { + "question_id": "450", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If all the grass died, what would be most affected?\nChoices:\n(A) garter snakes\n(B) hognose snakes\n(C) hawks\n(D) grasshoppers", + "choices": [ + "garter snakes", + "hognose snakes", + "hawks", + "grasshoppers" + ], + "answer": "grasshoppers", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "garter snakes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "452": { + "question_id": "452", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the image, what is the most likely equilibrium population count?\nChoices:\n(A) 40\n(B) 60\n(C) 80\n(D) 100", + "choices": [ + "40", + "60", + "80", + "100" + ], + "answer": "80", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 366, + "img_width": 441, + "language": "english", + "skills": [ + "algebraic reasoning", + "statistical reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "454": { + "question_id": "454", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "456": { + "question_id": "456", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "458": { + "question_id": "458", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: If you add the two visible numbers, on the jerseys, what is the total sum?", + "choices": null, + "answer": "3", + "extraction": "23", + "prediction": "23", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "460": { + "question_id": "460", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If there were fewer leaves in this ecosystem, the first organism to experience change as a result would be:\nChoices:\n(A) Frogs\n(B) Crickets\n(C) Snakes\n(D) Hawks", + "choices": [ + "Frogs", + "Crickets", + "Snakes", + "Hawks" + ], + "answer": "Crickets", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Frogs", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 720, + "img_width": 960, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "462": { + "question_id": "462", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values larger than 100?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "464": { + "question_id": "464", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer for the missing picture.\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4\n(E) 5\n(F) 6", + "choices": [ + "1", + "2", + "3", + "4", + "5", + "6" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 1316, + "img_width": 1000, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "466": { + "question_id": "466", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Periwinkle intersect Yellow Green?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 487, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "468": { + "question_id": "468", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people prefer the most preferred object?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "470": { + "question_id": "470", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following models has the lowest KS Rollout Loss overall?\nChoices:\n(A) Baseline\n(B) Diffusion\n(C) PDE-Refiner\n(D) Pushforward", + "choices": [ + "Baseline", + "Diffusion", + "PDE-Refiner", + "Pushforward" + ], + "answer": "PDE-Refiner", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Baseline", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 854, + "img_width": 1422, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "472": { + "question_id": "472", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "474": { + "question_id": "474", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many miles per gallon do an average city bus get?", + "choices": null, + "answer": "25", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 333, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "476": { + "question_id": "476", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If frogs were removed from this environment what animal would potentially see an increase in its population?\nChoices:\n(A) crickets\n(B) deer\n(C) snakes\n(D) hawks", + "choices": [ + "crickets", + "deer", + "snakes", + "hawks" + ], + "answer": "crickets", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "crickets", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 405, + "img_width": 518, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "478": { + "question_id": "478", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the diamond ABCD, two diagonal lines AC = 12.0, BD = 16.0, then the edge length of this diamond is ()\nChoices:\n(A) 10\n(B) 8\n(C) 6\n(D) 5", + "choices": [ + "10", + "8", + "6", + "5" + ], + "answer": "10", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 97, + "img_width": 125, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "480": { + "question_id": "480", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny blue metal bicycles behind the small sedan less than the number of purple fighters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "482": { + "question_id": "482", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, triangle ABC is inscribed in the circle with center O and diameter AC. If AB = AO, what is the degree measure of angle ABO?\nChoices:\n(A) 15*\\degree\n(B) 30*\\degree\n(C) 45*\\degree\n(D) 60*\\degree\n(E) 90*\\degree", + "choices": [ + "15*\\degree", + "30*\\degree", + "45*\\degree", + "60*\\degree", + "90*\\degree" + ], + "answer": "60*\\degree", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15*\\degree", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 134, + "img_width": 143, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "484": { + "question_id": "484", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "486": { + "question_id": "486", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0cAB\uff1d5\uff0cAD\uff1d7\uff0c\u5219ABCD\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 12\n(B) 14\n(C) 35\n(D) 24", + "choices": [ + "12", + "14", + "35", + "24" + ], + "answer": "24", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "12", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 79, + "img_width": 156, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "488": { + "question_id": "488", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown things. Subtract all tiny blue metallic objects. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "490": { + "question_id": "490", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9A\u3001C\u3001B\u5728\u540c\u4e00\u76f4\u7ebf\u4e0a\uff0cDC\u22a5EC\uff0c\u82e5\u2220BCD\uff1d40\u00b0\uff0c\u5219\u2220ACE\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 30\u00b0\n(B) 40\u00b0\n(C) 50\u00b0\n(D) 60\u00b0", + "choices": [ + "30\u00b0", + "40\u00b0", + "50\u00b0", + "60\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 88, + "img_width": 155, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "492": { + "question_id": "492", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the \u2299O with a radius of 2.0, C is a point on the extended line of the diameter AB, CD is tangent to the circle at point D. Connect AD, given that \u2220DAC = 30.0, the length of the line segment CD is ()\nChoices:\n(A) 1\n(B) \u221a{3}\n(C) 2\n(D) 2\u221a{3}", + "choices": [ + "1", + "\u221a{3}", + "2", + "2\u221a{3}" + ], + "answer": "2\u221a{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 158, + "img_width": 203, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "494": { + "question_id": "494", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "3", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 97, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "496": { + "question_id": "496", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "20", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "498": { + "question_id": "498", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the water half full?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 478, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "500": { + "question_id": "500", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1236, + "img_width": 987, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "502": { + "question_id": "502", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tandem bikes that are behind the brown metal bicycle than matte trucks on the left side of the green object?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "504": { + "question_id": "504", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, D and E are the points on the edges AB and AC of \u25b3ABC, DE \u2225 BC, if AD:DB=1.0:3.0, AE = 2.0, then the length of AC is ()\nChoices:\n(A) 10\n(B) 8\n(C) 6\n(D) 4", + "choices": [ + "10", + "8", + "6", + "4" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 86, + "img_width": 117, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "506": { + "question_id": "506", + "query": "Hint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?", + "choices": null, + "answer": "[2014, 2016]", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "true_false": false, + "question_type": "free_form", + "answer_type": "list", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "508": { + "question_id": "508", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The owner of a bed and breakfast inn recalled how many guests the inn had hosted each day. What is the median of the numbers?'", + "choices": null, + "answer": "5", + "extraction": "5", + "prediction": "5", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 241, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "510": { + "question_id": "510", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220C = 90.0, AC = 4.0, AB = 5.0, then the value of sinB is ()\nChoices:\n(A) \\frac{2}{3}\n(B) \\frac{3}{5}\n(C) \\frac{3}{4}\n(D) \\frac{4}{5}", + "choices": [ + "\\frac{2}{3}", + "\\frac{3}{5}", + "\\frac{3}{4}", + "\\frac{4}{5}" + ], + "answer": "\\frac{4}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{2}{3}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 186, + "img_width": 119, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "512": { + "question_id": "512", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the y coordinate of the center of mass of the isosceles right triangle of uniform areal density shown in Figure 9-C?", + "choices": null, + "answer": "0.24", + "extraction": "0.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 356, + "img_width": 497, + "language": "english", + "skills": [ + "geometry reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "514": { + "question_id": "514", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you wanted the leaf with the least main veins, which would you choose?\nChoices:\n(A) 3 main veins\n(B) pinnate\n(C) reticulate\n(D) palmate", + "choices": [ + "3 main veins", + "pinnate", + "reticulate", + "palmate" + ], + "answer": "3 main veins", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3 main veins", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 236, + "img_width": 559, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "516": { + "question_id": "516", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are most the stepping stones square?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 339, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "518": { + "question_id": "518", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2211, + "img_width": 2838, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "520": { + "question_id": "520", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Magenta have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 741, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "522": { + "question_id": "522", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 86, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "524": { + "question_id": "524", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The Kingwood Ski Resort asked its guests how many times they went sledding last winter. How many guests went sledding more than 2 times?'", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 163, + "img_width": 351, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "526": { + "question_id": "526", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) slide\n(B) flip\n(C) turn", + "choices": [ + "slide", + "flip", + "turn" + ], + "answer": "slide", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "slide", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 104, + "img_width": 253, + "language": "english", + "skills": [ + "geometry reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "528": { + "question_id": "528", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u2225CD\uff0cBD\u22a5CF\uff0c\u5782\u8db3\u4e3aB\uff0c\u2220ABF\uff1d35\u00b0\uff0c\u5219\u2220BDC\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 25\u00b0\n(B) 35\u00b0\n(C) 45\u00b0\n(D) 55\u00b0", + "choices": [ + "25\u00b0", + "35\u00b0", + "45\u00b0", + "55\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 135, + "img_width": 194, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "530": { + "question_id": "530", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The advertising agency counted the number of billboards in each city in the state. How many cities have fewer than 70 billboards? (Unit: cities)", + "choices": null, + "answer": "9", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 180, + "img_width": 140, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "532": { + "question_id": "532", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer gray trucks that are in front of the large aeroplane than big yellow metal objects in front of the purple object?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "534": { + "question_id": "534", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of stunted female children greater than the average percentage of stunted female children taken over all years ?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 883, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "536": { + "question_id": "536", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A, B, and C are on \u2299O, if \u2220C = 35.0, then \u2220AOB = ()\nChoices:\n(A) 17.5\u00b0\n(B) 35\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "17.5\u00b0", + "35\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "70\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "17.5\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 105, + "img_width": 115, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "538": { + "question_id": "538", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the two concentric circles, the chord AB of the great circle is tangent to the small circle at point C. If AB = 6.0, the area of \u200b\u200bthe ring is ()\nChoices:\n(A) 9\u03c0\n(B) 6\u03c0\n(C) 3\u03c0\n(D) \u03c0", + "choices": [ + "9\u03c0", + "6\u03c0", + "3\u03c0", + "\u03c0" + ], + "answer": "9\u03c0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "9\u03c0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 115, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "540": { + "question_id": "540", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5", + "choices": [ + "3/11", + "8/11", + "6/11", + "3/5" + ], + "answer": "3/11", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3/11", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 103, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "542": { + "question_id": "542", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many models in the figure achieve an Acc score greater than 60?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scatter plot", + "grade": "college", + "img_height": 1358, + "img_width": 1690, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "544": { + "question_id": "544", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the total percentage of people who say that they do either less or more often than the usual amount of exercise during the coronavirus pandemic in the United States as of April 2020?", + "choices": null, + "answer": "44", + "extraction": "77", + "prediction": "77", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "546": { + "question_id": "546", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the overall ratio of male to female?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "548": { + "question_id": "548", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer cyan jets than big buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "550": { + "question_id": "550", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the accuracy of the algorithm with highest accuracy?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "552": { + "question_id": "552", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many queries have a p-value lower than 0.50?", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 330, + "img_width": 1726, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "554": { + "question_id": "554", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Burlywood the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 488, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "556": { + "question_id": "556", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer large red metallic things that are on the left side of the cyan shiny scooter than things that are in front of the small jet?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "558": { + "question_id": "558", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "560": { + "question_id": "560", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Salmon the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 514, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "562": { + "question_id": "562", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small green cubes. Subtract all large cylinders. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "564": { + "question_id": "564", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest and the lowest time required to import ?", + "choices": null, + "answer": "4", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1056, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "566": { + "question_id": "566", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5df2\u77e5\u25b3ABC\u224c\u25b3DEF\uff0cCD\u5e73\u5206\u2220BCA\uff0c\u82e5\u2220A\uff1d22\u00b0\uff0c\u2220CGF\uff1d88\u00b0\uff0c\u5219\u2220E\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 26\u00b0\n(B) 28\u00b0\n(C) 30\u00b0\n(D) 34\u00b0", + "choices": [ + "26\u00b0", + "28\u00b0", + "30\u00b0", + "34\u00b0" + ], + "answer": "26\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "26\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 89, + "img_width": 89, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "568": { + "question_id": "568", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For an economics project, Colleen determined the cost of ferry rides for bicycles and cars. How much higher is the fare for a car on the Mukilteu-Clinton ferry than on the Southport-Fort Fisher ferry? (Unit: $)", + "choices": null, + "answer": "2", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 349, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "570": { + "question_id": "570", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all purple matte blocks. Subtract all brown things. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "572": { + "question_id": "572", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When does the function start decreasing?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 316, + "img_width": 400, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "574": { + "question_id": "574", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do you see the figures inside these boxes? They form a pattern. Choose the figure in the answer row below that continues the pattern.\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4\n(E) 5", + "choices": [ + "1", + "2", + "3", + "4", + "5" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 378, + "img_width": 868, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "576": { + "question_id": "576", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the human brain is the largest and most anterior part of each cerebral hemisphere?\nChoices:\n(A) motor cortex\n(B) occipital lobe\n(C) temporal lobe\n(D) frontal lobe", + "choices": [ + "motor cortex", + "occipital lobe", + "temporal lobe", + "frontal lobe" + ], + "answer": "frontal lobe", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "motor cortex", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 625, + "img_width": 768, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "578": { + "question_id": "578", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9567", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 285, + "img_width": 637, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "580": { + "question_id": "580", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Slate the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 650, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "582": { + "question_id": "582", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Green greater than Rebecca Purple?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 582, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "584": { + "question_id": "584", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A philanthropic organization compared the amounts of money that its members donated to certain causes. Who donated more money to arts education, Aubrey or Connor?'\nChoices:\n(A) Connor\n(B) Aubrey", + "choices": [ + "Connor", + "Aubrey" + ], + "answer": "Connor", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Connor", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 391, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "586": { + "question_id": "586", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220BAC = 90.0, rotate \u25b3ABC clockwise around point A by 90.0 to obtain \u25b3AB\u2032C\u2032 (the corresponding point of point B is point B\u2032, and the corresponding point of point C is point C \u2032), connect CC\u2032. If \u2220CC\u2032B\u2032 = 32.0, then the size of \u2220AC\u2032B\u2032 is ()\nChoices:\n(A) 32\u00b0\n(B) 45\u00b0\n(C) 13\u00b0\n(D) 30\u00b0", + "choices": [ + "32\u00b0", + "45\u00b0", + "13\u00b0", + "30\u00b0" + ], + "answer": "13\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "32\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 75, + "img_width": 80, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "588": { + "question_id": "588", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year has more actual total income?", + "choices": null, + "answer": "1982", + "extraction": "1970", + "prediction": "1970", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2281, + "img_width": 1785, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "590": { + "question_id": "590", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "13", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 264, + "img_width": 376, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "592": { + "question_id": "592", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the global maximum of this function?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 318, + "img_width": 283, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "594": { + "question_id": "594", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When does the expenditure per student in Jamaica have the greatest increase?", + "choices": null, + "answer": "2005", + "extraction": "2011", + "prediction": "2011", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "596": { + "question_id": "596", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dodger Blue the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 407, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "598": { + "question_id": "598", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most curved beak species?\nChoices:\n(A) iiki\n(B) swallow-tanager\n(C) cliff swallow\n(D) hawfinch", + "choices": [ + "iiki", + "swallow-tanager", + "cliff swallow", + "hawfinch" + ], + "answer": "iiki", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "iiki", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 463, + "img_width": 593, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "600": { + "question_id": "600", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "A", + "extraction": "D", + "prediction": "D", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 637, + "img_width": 424, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "602": { + "question_id": "602", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Rectangle ABCD is subdivided into two identical square regions, as in the figure above. If the area of each square is 9, what is the perimeter of ABCD?", + "choices": null, + "answer": "18", + "extraction": "36", + "prediction": "36", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 219, + "img_width": 435, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "604": { + "question_id": "604", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "606": { + "question_id": "606", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the rectangle?", + "choices": null, + "answer": "10", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 209, + "img_width": 335, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "608": { + "question_id": "608", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does South Carolina have the highest value in the South ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 560, + "img_width": 775, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "610": { + "question_id": "610", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, P, Q, and R lie on the same line. P is the center of the larger circle, and Q is the center of the smaller circle. If the radius of the larger circle is 4, what is the radius of the smaller circle?\nChoices:\n(A) 1\n(B) 2\n(C) 4\n(D) 8\n(E) 16", + "choices": [ + "1", + "2", + "4", + "8", + "16" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 353, + "img_width": 411, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "612": { + "question_id": "612", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue metal things. Subtract all tiny objects. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "4", + "prediction": "4", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "614": { + "question_id": "614", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 661, + "img_width": 915, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "616": { + "question_id": "616", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the ratio of instagram to google?", + "choices": null, + "answer": "2", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "618": { + "question_id": "618", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "620": { + "question_id": "620", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 199, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "622": { + "question_id": "622", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0cD\u662fBC\u4e0a\u7684\u70b9\uff0c\u4e14BD\uff1d2\uff0cDC\uff1d1\uff0cS\u25b3ACD\uff1d12\uff0c\u90a3\u4e48S\u25b3ABC\u7b49\u4e8e\uff08\uff09\nChoices:\n(A) 30\n(B) 36\n(C) 72\n(D) 24", + "choices": [ + "30", + "36", + "72", + "24" + ], + "answer": "36", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 146, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "624": { + "question_id": "624", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the total unemployed labor force in Upper middle income greater than 1.6 %?", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1344, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "626": { + "question_id": "626", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown objects. Subtract all large purple cylinders. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "628": { + "question_id": "628", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0c\u2220ABC\u7684\u5e73\u5206\u7ebf\u4ea4AD\u4e8e\u70b9E\uff0c\u2220BCD\u7684\u5e73\u5206\u7ebf\u4ea4AD\u4e8e\u70b9F\uff0c\u82e5AB\uff1d3\uff0cAD\uff1d4\uff0c\u5219EF\u7684\u957f\u662f\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 2.5\n(D) 3", + "choices": [ + "1", + "2", + "2.5", + "3" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 151, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "630": { + "question_id": "630", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Find the size of angle MBD in the figure below.", + "choices": null, + "answer": "72", + "extraction": "62", + "prediction": "62", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 195, + "img_width": 340, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "632": { + "question_id": "632", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the total value of the More bar?", + "choices": null, + "answer": "52", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 350, + "img_width": 309, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "634": { + "question_id": "634", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfAB\uff0cCD\u4ea4\u4e8e\u70b9O\uff0e\u5c04\u7ebfOE\u5e73\u5206\u2220BOC\uff0c\u82e5\u2220AOD\uff1d70\u00b0\uff0c\u5219\u2220AOE\u7b49\u4e8e\uff08\uff09\nChoices:\n(A) 35\u00b0\n(B) 110\u00b0\n(C) 135\u00b0\n(D) 145\u00b0", + "choices": [ + "35\u00b0", + "110\u00b0", + "135\u00b0", + "145\u00b0" + ], + "answer": "145\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 141, + "img_width": 173, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "636": { + "question_id": "636", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "34", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 117, + "img_width": 92, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "638": { + "question_id": "638", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the under-5 male mortality rate greater than the average under-5 male mortality rate taken over all years ?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 880, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "640": { + "question_id": "640", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $\\widehat{\\mathrm{WN}}$ if $\\triangle \\mathrm{IWN}$ is equilateral and $W N=5$\nChoices:\n(A) \\frac { 3 } { 5 } \\pi\n(B) \\frac { 5 } { 3 } \\pi\n(C) 5 \\pi\n(D) 10 \\pi", + "choices": [ + "\\frac { 3 } { 5 } \\pi", + "\\frac { 5 } { 3 } \\pi", + "5 \\pi", + "10 \\pi" + ], + "answer": "\\frac { 5 } { 3 } \\pi", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac { 3 } { 5 } \\pi", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 222, + "img_width": 309, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "642": { + "question_id": "642", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Line AB is tangent to circle O. If AB = 8 and OB = 10, find the diameter of the circle.\nChoices:\n(A) 4\n(B) 6\n(C) 8\n(D) 10\n(E) 12", + "choices": [ + "4", + "6", + "8", + "10", + "12" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 443, + "img_width": 347, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "644": { + "question_id": "644", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the missing number in the picture?\nChoices:\n(A) 6\n(B) 8\n(C) 10\n(D) 11", + "choices": [ + "6", + "8", + "10", + "11" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 452, + "img_width": 494, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "646": { + "question_id": "646", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The employee at the department store counted the number of ties on each tie rack. How many racks have at least 0 ties? (Unit: racks)", + "choices": null, + "answer": "25", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 224, + "img_width": 131, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "648": { + "question_id": "648", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the minimum value of this function?", + "choices": null, + "answer": "-1", + "extraction": "-3", + "prediction": "-3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 296, + "img_width": 600, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "650": { + "question_id": "650", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the sum of maximum employment rate and minimum employment?", + "choices": null, + "answer": "31.3", + "extraction": "11.1", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "652": { + "question_id": "652", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 365, + "img_width": 845, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "654": { + "question_id": "654", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer yellow metallic motorbikes that are in front of the small brown metal dirtbike than big yellow dirtbikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "656": { + "question_id": "656", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Maroon the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 776, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "658": { + "question_id": "658", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 115, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "660": { + "question_id": "660", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer small fighters than yellow matte tandem bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "662": { + "question_id": "662", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much more accurate is the most accurate algorithm compared the least accurate algorithm?", + "choices": null, + "answer": "80", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "664": { + "question_id": "664", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest number of responses for Question 10, for any given % of inside sales?", + "choices": null, + "answer": "17", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2245, + "img_width": 1692, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "666": { + "question_id": "666", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red objects. Subtract all big green things. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "4", + "prediction": "4", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "668": { + "question_id": "668", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does the first symbol in the legend represent the smallest category ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 560, + "img_width": 775, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "670": { + "question_id": "670", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which date of Meeting was the most number of shares transferred?\nChoices:\n(A) 04/06/2005\n(B) 04/02/2005\n(C) 04/05/2005\n(D) 04/03/2005\n(E) 04/04/2005", + "choices": [ + "04/06/2005", + "04/02/2005", + "04/05/2005", + "04/03/2005", + "04/04/2005" + ], + "answer": "04/02/2005", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "04/06/2005", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2135, + "img_width": 1582, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "672": { + "question_id": "672", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the twig to the nearest inch. The twig is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 169, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "674": { + "question_id": "674", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, CDE is an equilateral triangle and ABCE is a square with an area of 1. What is the perimeter of polygon ABCDE?\nChoices:\n(A) 4\n(B) 5\n(C) 6\n(D) 7\n(E) 8", + "choices": [ + "4", + "5", + "6", + "7", + "8" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 89, + "img_width": 125, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "676": { + "question_id": "676", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "678": { + "question_id": "678", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x\nChoices:\n(A) 21\n(B) 34\n(C) 58\n(D) 67", + "choices": [ + "21", + "34", + "58", + "67" + ], + "answer": "58", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "21", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 149, + "img_width": 267, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "680": { + "question_id": "680", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "5", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 303, + "img_width": 440, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "682": { + "question_id": "682", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, if all the grass dies then population of squirrel will\nChoices:\n(A) decrease\n(B) remains the same\n(C) increase\n(D) NA", + "choices": [ + "decrease", + "remains the same", + "increase", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 592, + "img_width": 864, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "684": { + "question_id": "684", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{CH} \\cong \\overline{KJ}$. Find $x$.\nChoices:\n(A) 27\n(B) 54\n(C) 55\n(D) 83", + "choices": [ + "27", + "54", + "55", + "83" + ], + "answer": "55", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "27", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 444, + "img_width": 608, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "686": { + "question_id": "686", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function invertible?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 442, + "img_width": 731, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "688": { + "question_id": "688", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the minimum age group shown in the \u2018plots\u2019?\nChoices:\n(A) 11-15\n(B) 21-25\n(C) 6-10\n(D) 16-20\n(E) 0-5", + "choices": [ + "11-15", + "21-25", + "6-10", + "16-20", + "0-5" + ], + "answer": "0-5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "11-15", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2136, + "img_width": 3160, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "690": { + "question_id": "690", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram above, lines M and N are parallel. All of the following are true except\nChoices:\n(A) a + b = j + l\n(B) g = h\n(C) c + f = f + b\n(D) g + e + f + h = 360\n(E) d + e = f + j", + "choices": [ + "a + b = j + l", + "g = h", + "c + f = f + b", + "g + e + f + h = 360", + "d + e = f + j" + ], + "answer": "d + e = f + j", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a + b = j + l", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 558, + "img_width": 625, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "692": { + "question_id": "692", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the given food chain if grasses dried up in summer, what is likely to happen?\nChoices:\n(A) Grasshoppers will decrease.\n(B) shrews will become extinct\n(C) owls will increase.\n(D) None of the above", + "choices": [ + "Grasshoppers will decrease.", + "shrews will become extinct", + "owls will increase.", + "None of the above" + ], + "answer": "Grasshoppers will decrease.", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Grasshoppers will decrease.", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 189, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "694": { + "question_id": "694", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u83f1\u5f62ABCD\u4e2d\uff0cM\u3001N\u5206\u522b\u662fBC\u548cCD\u7684\u4e2d\u70b9\uff0cNP\u22a5AB\u4e8e\u70b9P\uff0c\u8fde\u63a5MP\uff0e\u82e5\u2220DAB\uff1d40\u00b0\uff0c\u5219\u2220MPB\uff1d\uff08\uff09\nChoices:\n(A) 125\u00b0\n(B) 120\u00b0\n(C) 115\u00b0\n(D) 110\u00b0", + "choices": [ + "125\u00b0", + "120\u00b0", + "115\u00b0", + "110\u00b0" + ], + "answer": "110\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "125\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 85, + "img_width": 158, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "696": { + "question_id": "696", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Erica has $1,525.00. Does she have enough to buy a motorcycle and a canoe?'\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 192, + "img_width": 214, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "698": { + "question_id": "698", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the triangle in the figure above, what is the value of x?\nChoices:\n(A) 2*\\sqrt{3}\n(B) 6*\\sqrt{2}\n(C) 6*\\sqrt{3}\n(D) 6\n(E) 12", + "choices": [ + "2*\\sqrt{3}", + "6*\\sqrt{2}", + "6*\\sqrt{3}", + "6", + "12" + ], + "answer": "2*\\sqrt{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2*\\sqrt{3}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 376, + "img_width": 615, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "700": { + "question_id": "700", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u2299O\u662f\u25b3ABC\u7684\u5916\u63a5\u5706\uff0cAB\uff1dBC\uff1d4\uff0c\u628a\u5f27AB\u6cbf\u5f26AB\u5411\u4e0b\u6298\u53e0\u4ea4BC\u4e8e\u70b9D\uff0c\u82e5\u70b9D\u4e3aBC\u4e2d\u70b9\uff0c\u5219AC\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 2\u221a{2}\n(D) \u221a{6}", + "choices": [ + "1", + "2", + "2\u221a{2}", + "\u221a{6}" + ], + "answer": "2\u221a{2}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 132, + "img_width": 144, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "702": { + "question_id": "702", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is cumulative increase in weight ( in grams) for \"GROUP A\" in third week ( give an approximate value) ?", + "choices": null, + "answer": "400", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2237, + "img_width": 1754, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "704": { + "question_id": "704", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which two puzzle pieces form the larger square?\nChoices:\n(A) 1 & 2\n(B) 1 & 3\n(C) 1 & 4\n(D) 2 & 3\n(E) 2 & 4", + "choices": [ + "1 & 2", + "1 & 3", + "1 & 4", + "2 & 3", + "2 & 4" + ], + "answer": "1 & 3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1 & 2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 440, + "img_width": 396, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "706": { + "question_id": "706", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the image of the dot (8,-2) under a clockwise rotation by 270\u00b0 about the origin.\"\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "C", + "extraction": "B", + "prediction": "B", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 432, + "img_width": 438, + "language": "english", + "skills": [ + "logical reasoning", + "geometry reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "708": { + "question_id": "708", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the light source P is directly above the crossbar AB, the shadow of AB under the light is CD, AB \u2225 CD, AB = 2.0, CD = 5.0, the distance between point P and CD is 3.0, then the distance between AB and CD is ().\nChoices:\n(A) \\frac{6}{5}\n(B) \\frac{7}{6}\n(C) \\frac{9}{5}\n(D) \\frac{15}{2}", + "choices": [ + "\\frac{6}{5}", + "\\frac{7}{6}", + "\\frac{9}{5}", + "\\frac{15}{2}" + ], + "answer": "\\frac{9}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{6}{5}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 156, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "710": { + "question_id": "710", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1555, + "img_width": 2293, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "712": { + "question_id": "712", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "9", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 244, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "714": { + "question_id": "714", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of large brown rubber motorbikes in front of the big motorbike greater than the number of big green sedans?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "716": { + "question_id": "716", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find y.\nChoices:\n(A) 16 \\sqrt { 2 }\n(B) 16 \\sqrt { 3 }\n(C) 32\n(D) 16 \\sqrt { 5 }", + "choices": [ + "16 \\sqrt { 2 }", + "16 \\sqrt { 3 }", + "32", + "16 \\sqrt { 5 }" + ], + "answer": "16 \\sqrt { 5 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "16 \\sqrt { 2 }", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 196, + "img_width": 427, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "718": { + "question_id": "718", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Jeffrey is the proud owner of an eclectic bow tie collection. He keeps track of how many bow ties he has, and organizes them by pattern and material. What is the probability that a randomly selected bow tie is designed with swirls and is made of velvet? Simplify any fractions.'", + "choices": null, + "answer": "0.21", + "extraction": "0.33", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 94, + "img_width": 215, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "720": { + "question_id": "720", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When does the function value first reach 2?", + "choices": null, + "answer": "2", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 350, + "img_width": 362, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "722": { + "question_id": "722", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Deep Sky Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 677, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "724": { + "question_id": "724", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Rebecca Purple have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 638, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "726": { + "question_id": "726", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x. Assume that any segment that appears to be tangent is tangent.\nChoices:\n(A) 10\n(B) 30\n(C) 90\n(D) 120", + "choices": [ + "10", + "30", + "90", + "120" + ], + "answer": "10", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 199, + "img_width": 228, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "728": { + "question_id": "728", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 69, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "730": { + "question_id": "730", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In which year the market share of KLA is highest?", + "choices": null, + "answer": "2019", + "extraction": "2013", + "prediction": "2013", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "732": { + "question_id": "732", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism would be most affected if there was a shortage of plants?\nChoices:\n(A) Grasshopper\n(B) Snake\n(C) Mouse\n(D) Hawk", + "choices": [ + "Grasshopper", + "Snake", + "Mouse", + "Hawk" + ], + "answer": "Grasshopper", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Grasshopper", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1080, + "img_width": 1152, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "734": { + "question_id": "734", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer double buss that are behind the aeroplane than things on the left side of the yellow double bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "736": { + "question_id": "736", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5df2\u77e5\u76f4\u7ebfa\u2225b\uff0c\u76f4\u89d2\u4e09\u89d2\u5f62ABC\u4e2d\uff0c\u2220C\uff1d90\u00b0\uff0c\u82e5\u2220B\uff1d58\u00b0\uff0c\u90a3\u4e48\u22201\ufe63\u22202\uff1d\uff08\uff09\nChoices:\n(A) 28\u00b0\n(B) 30\u00b0\n(C) 32\u00b0\n(D) 58\u00b0", + "choices": [ + "28\u00b0", + "30\u00b0", + "32\u00b0", + "58\u00b0" + ], + "answer": "32\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "28\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 154, + "img_width": 226, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "738": { + "question_id": "738", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function continuous?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 268, + "img_width": 383, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "740": { + "question_id": "740", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What percent of the stands are full?\nChoices:\n(A) 15\n(B) 100\n(C) 50\n(D) 50", + "choices": [ + "15", + "100", + "50", + "50" + ], + "answer": "15", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 375, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "742": { + "question_id": "742", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the twig to the nearest inch. The twig is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 159, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "744": { + "question_id": "744", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If RL = 5, RT = 9, and WS = 6, find RW.\nChoices:\n(A) 5.4\n(B) 6\n(C) 6.6\n(D) 7.5", + "choices": [ + "5.4", + "6", + "6.6", + "7.5" + ], + "answer": "7.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5.4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 199, + "img_width": 404, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "746": { + "question_id": "746", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Mrs. Zimmerman hosts an annual art contest for kids, and she keeps a record of the number of entries each year. According to the table, what was the rate of change between 2013 and 2014? (Unit: entries per year)", + "choices": null, + "answer": "7", + "extraction": "13", + "prediction": "13", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 199, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "748": { + "question_id": "748", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, PA and PB are tangents of \u2299O, the tangent point of point A and B, AC is the diameter of \u2299O, given that \u2220P = 50.0, then the size of \u2220ACB is ()\nChoices:\n(A) 65\u00b0\n(B) 60\u00b0\n(C) 55\u00b0\n(D) 50\u00b0", + "choices": [ + "65\u00b0", + "60\u00b0", + "55\u00b0", + "50\u00b0" + ], + "answer": "65\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 117, + "img_width": 207, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "750": { + "question_id": "750", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "18", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 356, + "img_width": 290, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "752": { + "question_id": "752", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cPA\u662f\u2299O\u7684\u5207\u7ebf\uff0c\u5207\u70b9\u4e3aA\uff0cOP\uff1d4\uff0c\u2220APO\uff1d30\u00b0\uff0c\u5219\u2299O\u7684\u534a\u5f84\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) \u221a{3}\n(C) 2\n(D) 3", + "choices": [ + "1", + "\u221a{3}", + "2", + "3" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 87, + "img_width": 122, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "754": { + "question_id": "754", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Base your answers on the diagram below, which shows a partial food web. What will happen to fish population if algae's are decreased?\nChoices:\n(A) Population will decrease\n(B) Population will remain the same\n(C) Population will increase\n(D) None of the above", + "choices": [ + "Population will decrease", + "Population will remain the same", + "Population will increase", + "None of the above" + ], + "answer": "Population will decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Population will decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 364, + "img_width": 464, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "756": { + "question_id": "756", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the trees died, the population of porcupine would most likely\nChoices:\n(A) double\n(B) skyrocket\n(C) decrease\n(D) increase", + "choices": [ + "double", + "skyrocket", + "decrease", + "increase" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "double", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 591, + "img_width": 765, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "758": { + "question_id": "758", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny purple trucks behind the small matte motorbike less than the number of fighters that are behind the big metal utility bike?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "760": { + "question_id": "760", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of yellow tandem bikes less than the number of big objects?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "762": { + "question_id": "762", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the center of symmetry of this function?\nChoices:\n(A) (0, 0)\n(B) (-1, 0)\n(C) (2, 0)", + "choices": [ + "(0, 0)", + "(-1, 0)", + "(2, 0)" + ], + "answer": "(0, 0)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(0, 0)", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 395, + "img_width": 500, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "764": { + "question_id": "764", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average number of bananas on each stock?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 349, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "766": { + "question_id": "766", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tiny red trucks than small blue bicycles?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "768": { + "question_id": "768", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use the graph to answer the question below. Which month is the hottest on average in Rome?\nChoices:\n(A) December, January, and February\n(B) July and August\n(C) March and April", + "choices": [ + "December, January, and February", + "July and August", + "March and April" + ], + "answer": "July and August", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "December, January, and February", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "elementary school", + "img_height": 323, + "img_width": 448, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "770": { + "question_id": "770", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the amplitude of this function?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 276, + "img_width": 482, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "772": { + "question_id": "772", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of small yellow shiny motorbikes greater than the number of red rubber fighters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "774": { + "question_id": "774", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer large matte utility bikes than small yellow bicycles?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "776": { + "question_id": "776", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $JQ$ if $Q$ is the incenter of $\\triangle JLN$. Rounded to the nearest hundredth.\nChoices:\n(A) 16.50\n(B) 18.79\n(C) 20.32\n(D) 25.50", + "choices": [ + "16.50", + "18.79", + "20.32", + "25.50" + ], + "answer": "18.79", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "16.50", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 424, + "img_width": 589, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "778": { + "question_id": "778", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you find the missing shape in this picture puzzle?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D", + "choices": [ + "A", + "B", + "C", + "D" + ], + "answer": "A", + "extraction": "A", + "prediction": "A", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 431, + "img_width": 797, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "780": { + "question_id": "780", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "7", + "extraction": "7", + "prediction": "7", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 209, + "img_width": 848, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "782": { + "question_id": "782", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "4", + "extraction": "18", + "prediction": "18", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 376, + "img_width": 384, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "784": { + "question_id": "784", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Across all years, what is the maximum rating of statistical capacity in Maldives ?", + "choices": null, + "answer": "70", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 938, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "786": { + "question_id": "786", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle K$\nChoices:\n(A) 6\n(B) 60\n(C) 100\n(D) 180", + "choices": [ + "6", + "60", + "100", + "180" + ], + "answer": "100", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 237, + "img_width": 317, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "788": { + "question_id": "788", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 332, + "img_width": 515, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "790": { + "question_id": "790", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u25b3ABC\u4e2d\uff0cN\u662fBC\u8fb9\u4e0a\u7684\u4e2d\u70b9\uff0cAM\u5e73\u5206\u2220BAC\uff0cBM\u22a5AM\u4e8e\u70b9M\uff0c\u82e5AB\uff1d8\uff0cMN\uff1d2\uff0e\u5219AC\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 10\n(B) 11\n(C) 12\n(D) 13", + "choices": [ + "10", + "11", + "12", + "13" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 105, + "img_width": 145, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "792": { + "question_id": "792", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2624, + "img_width": 3936, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "794": { + "question_id": "794", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values larger than 4?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "796": { + "question_id": "796", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1938, + "img_width": 2516, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "798": { + "question_id": "798", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, l || m. Which of the following must equal 180?\nChoices:\n(A) k + n + r\n(B) k + p + s\n(C) n + p + s\n(D) n + p + t\n(E) r + s + t", + "choices": [ + "k + n + r", + "k + p + s", + "n + p + s", + "n + p + t", + "r + s + t" + ], + "answer": "k + p + s", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "k + n + r", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 372, + "img_width": 371, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "800": { + "question_id": "800", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Medium Orchid intersect Forest Green?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 596, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "802": { + "question_id": "802", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Karen bought 4 pounds of silk scraps and 4 pounds of canvas scraps. How much did she spend? (Unit: $)", + "choices": null, + "answer": "69", + "extraction": "17", + "prediction": "17", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 194, + "img_width": 243, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "804": { + "question_id": "804", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\odot B$, $CE=13.5$. Find $BD$. Round to the nearest hundredth.\nChoices:\n(A) 3.71\n(B) 4.29\n(C) 4.53\n(D) 6.75", + "choices": [ + "3.71", + "4.29", + "4.53", + "6.75" + ], + "answer": "4.29", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3.71", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 524, + "img_width": 493, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "806": { + "question_id": "806", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, and point C is on \u2299O. If \u2220A = 40.0, then the degree of \u2220B is ()\nChoices:\n(A) 80\u00b0\n(B) 60\u00b0\n(C) 50\u00b0\n(D) 40\u00b0", + "choices": [ + "80\u00b0", + "60\u00b0", + "50\u00b0", + "40\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "80\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 107, + "img_width": 127, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "808": { + "question_id": "808", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large purple spheres. Subtract all small gray things. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "810": { + "question_id": "810", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow metallic balls. Subtract all small yellow shiny things. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "812": { + "question_id": "812", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does the gray bar always have smaller value?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 1286, + "img_width": 840, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "814": { + "question_id": "814", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest individual bar in the whole chart?", + "choices": null, + "answer": "100000000", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "816": { + "question_id": "816", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x. Round to the nearest tenth, if necessary.\nChoices:\n(A) 3\n(B) 9\n(C) 12.25\n(D) 24", + "choices": [ + "3", + "9", + "12.25", + "24" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 272, + "img_width": 379, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "818": { + "question_id": "818", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What's the ratio of least value of light brown graph and leftmost value of dark brown graph?", + "choices": null, + "answer": "0.32", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 434, + "img_width": 310, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "820": { + "question_id": "820", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $a=14, b=48,$ and $c=50$ find $cosA$\nChoices:\n(A) 0.14\n(B) 0.48\n(C) 0.50\n(D) 0.96", + "choices": [ + "0.14", + "0.48", + "0.50", + "0.96" + ], + "answer": "0.96", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.14", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 160, + "img_width": 238, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "822": { + "question_id": "822", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the perimeter of the parallelogram. Round to the nearest tenth if necessary.\nChoices:\n(A) 22\n(B) 40\n(C) 44\n(D) 48", + "choices": [ + "22", + "40", + "44", + "48" + ], + "answer": "44", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "22", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 227, + "img_width": 356, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "824": { + "question_id": "824", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)", + "choices": null, + "answer": "0.13", + "extraction": "0.97", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 192, + "img_width": 247, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "826": { + "question_id": "826", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is the largest part of the lung?\nChoices:\n(A) Inferior lobes\n(B) Cardiac notch\n(C) Superior lobes\n(D) Middle lobe", + "choices": [ + "Inferior lobes", + "Cardiac notch", + "Superior lobes", + "Middle lobe" + ], + "answer": "Superior lobes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Inferior lobes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 479, + "img_width": 638, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "828": { + "question_id": "828", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Linda wants to buy 0.9 pounds of double chocolate cookie dough. How much will she spend? (Unit: $)", + "choices": null, + "answer": "2.7", + "extraction": "3.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 194, + "img_width": 357, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "830": { + "question_id": "830", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "2", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 870, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "832": { + "question_id": "832", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(0)?", + "choices": null, + "answer": "-2", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1920, + "img_width": 1920, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "834": { + "question_id": "834", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the states that border Georgia , does Florida have the lowest value ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 610, + "img_width": 785, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "836": { + "question_id": "836", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the smallest species shown?\nChoices:\n(A) chinlea\n(B) arganodus\n(C) semionotus\n(D) xenacanthus", + "choices": [ + "chinlea", + "arganodus", + "semionotus", + "xenacanthus" + ], + "answer": "semionotus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "chinlea", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1076, + "img_width": 1500, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "838": { + "question_id": "838", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1200, + "img_width": 1600, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "840": { + "question_id": "840", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which item can you get the most protein?\nChoices:\n(A) salami\n(B) wine\n(C) cheese\n(D) bread", + "choices": [ + "salami", + "wine", + "cheese", + "bread" + ], + "answer": "salami", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "salami", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 375, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "842": { + "question_id": "842", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At a certain moment, there is a passenger ship at sea point P, and lighthouse A is measured in the direction 30.0 north by east of P, and is 50.0 nautical miles away. The passenger ship sails at the speed of 60.0 nautical mile/hour in the direction of 60.0 from north by west for $\\frac{2.0}{3.0}$hours to reach point B, then tan\u2220BAP = ()\nChoices:\n(A) \\frac{4}{5}\n(B) \\frac{6}{5}\n(C) \\frac{\u221a{5}}{5}\n(D) \\frac{2\u221a{5}}{5}", + "choices": [ + "\\frac{4}{5}", + "\\frac{6}{5}", + "\\frac{\u221a{5}}{5}", + "\\frac{2\u221a{5}}{5}" + ], + "answer": "\\frac{4}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{4}{5}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 115, + "img_width": 154, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "844": { + "question_id": "844", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the larger window shaped like the smaller window?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "846": { + "question_id": "846", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Brown the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 758, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "848": { + "question_id": "848", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the tuberculosis treatment success rate in Bulgaria greater than the average tuberculosis treatment success rate in Bulgaria taken over all years ?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1091, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "850": { + "question_id": "850", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of cars in front of the tiny metal thing less than the number of large matte things in front of the cyan rubber road bike?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "852": { + "question_id": "852", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "40", + "extraction": "13", + "prediction": "13", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 598, + "img_width": 612, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "854": { + "question_id": "854", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the pelicans in the community were eradicated, which population feel the most direct effect?\nChoices:\n(A) Plant\n(B) Phyto-plankton\n(C) Fish\n(D) Lizard", + "choices": [ + "Plant", + "Phyto-plankton", + "Fish", + "Lizard" + ], + "answer": "Fish", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Plant", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 947, + "img_width": 850, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "856": { + "question_id": "856", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture has the least leaves?\nChoices:\n(A) Both\n(B) Compound\n(C) Simple\n(D) Neither", + "choices": [ + "Both", + "Compound", + "Simple", + "Neither" + ], + "answer": "Simple", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Both", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 300, + "img_width": 400, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "858": { + "question_id": "858", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On the basis of the given food web, which organism will increase in number if there were no seals?\nChoices:\n(A) Shark\n(B) Small Shrimp\n(C) Octopus\n(D) Mysid Shrimp", + "choices": [ + "Shark", + "Small Shrimp", + "Octopus", + "Mysid Shrimp" + ], + "answer": "Octopus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Shark", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 764, + "img_width": 1162, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "860": { + "question_id": "860", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Miss Foley ran a sit-up competition among her P.E. students and monitored how many sit-ups each students could do. What is the largest number of sit-ups done? (Unit: sit-ups)", + "choices": null, + "answer": "86", + "extraction": "256", + "prediction": "256", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 246, + "img_width": 291, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "862": { + "question_id": "862", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: One of the most dramatic videos on the web (but entirely fictitious) supposedly shows a man sliding along a long water slide and then being launched into the air to land in a water pool. Let's attach some reasonable numbers to such a flight to calculate the velocity with which the man would have hit the water. Figure indicates the launch and landing sites and includes a superimposed coordinate system with its origin conveniently located at the launch site. From the video we take the horizontal flight distance as $D=20.0 \\mathrm{~m}$, the flight time as $t=2.50 \\mathrm{~s}$, and the launch angle as $\\theta_0=40.0^{\\circ}$. Find the magnitude of the velocity at launch and at landing.", + "choices": null, + "answer": "10.44", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 600, + "img_width": 1302, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "864": { + "question_id": "864", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "16", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1738, + "img_width": 2480, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "866": { + "question_id": "866", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For trapezoid $Q R S T, A$ and $B$ are midpoints of the legs. Find $m \\angle S$\nChoices:\n(A) 45\n(B) 60\n(C) 120\n(D) 135", + "choices": [ + "45", + "60", + "120", + "135" + ], + "answer": "135", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 169, + "img_width": 359, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "868": { + "question_id": "868", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big green cylinders. Subtract all rubber cylinders. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "5", + "prediction": "5", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "870": { + "question_id": "870", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there more tiny motorbikes in front of the small cyan tandem bike than big cyan metal double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "872": { + "question_id": "872", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the next shape.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D", + "choices": [ + "A", + "B", + "C", + "D" + ], + "answer": "D", + "extraction": "B", + "prediction": "B", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 496, + "img_width": 1472, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "874": { + "question_id": "874", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of y at x=-2.5?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 479, + "img_width": 479, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "876": { + "question_id": "876", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, square $ABDC$ is inscribed in $\\odot K$. Find the measure of a central angle.\nChoices:\n(A) 45\n(B) 60\n(C) 90\n(D) 180", + "choices": [ + "45", + "60", + "90", + "180" + ], + "answer": "90", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 275, + "img_width": 273, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "878": { + "question_id": "878", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220ACB\uff1d90\u00b0\uff0c\u4ee5Rt\u25b3ABC\u7684\u4e09\u8fb9\u4e3a\u8fb9\u5411\u5916\u4f5c\u6b63\u65b9\u5f62\uff0c\u5176\u9762\u79ef\u5206\u522b\u4e3aS1\uff0cS2\uff0cS3\uff0c\u4e14S1\uff1d5\uff0cS3\uff1d16\uff0c\u5219S2\uff1d\uff08\uff09\nChoices:\n(A) 6\n(B) 2\u221a{2}\n(C) 11\n(D) 24", + "choices": [ + "6", + "2\u221a{2}", + "11", + "24" + ], + "answer": "11", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 82, + "img_width": 94, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "880": { + "question_id": "880", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What's the total add up value of largest and smallest bar?", + "choices": null, + "answer": "252.65", + "extraction": "5.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "882": { + "question_id": "882", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Lawn Green the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 677, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "884": { + "question_id": "884", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the blue kite in the lower right corner shaped like?\nChoices:\n(A) ferret\n(B) cat\n(C) cloud\n(D) octopus", + "choices": [ + "ferret", + "cat", + "cloud", + "octopus" + ], + "answer": "octopus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "ferret", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "886": { + "question_id": "886", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A newspaper researched how many grocery stores there are in each town. What is the median of the numbers?'", + "choices": null, + "answer": "6", + "extraction": "6", + "prediction": "6", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 235, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "888": { + "question_id": "888", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small green shiny balls. Subtract all small metallic things. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "890": { + "question_id": "890", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is larger the moon or the sun?\nChoices:\n(A) Sun\n(B) It varies\n(C) They are equal in size\n(D) Moon", + "choices": [ + "Sun", + "It varies", + "They are equal in size", + "Moon" + ], + "answer": "Sun", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Sun", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 844, + "img_width": 1500, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "892": { + "question_id": "892", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does New Jersey have a higher value than Georgia ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "894": { + "question_id": "894", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms fat and acre?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "896": { + "question_id": "896", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Approximately, what percentage of jewelry sales in January were Rings?\nChoices:\n(A) Around 21%\n(B) Around 27%\n(C) Around 31%\n(D) Around 37%", + "choices": [ + "Around 21%", + "Around 27%", + "Around 31%", + "Around 37%" + ], + "answer": "Around 31%", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Around 21%", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "bar chart", + "grade": "elementary school", + "img_height": 464, + "img_width": 758, + "language": "english", + "skills": [ + "logical reasoning", + "statistical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "898": { + "question_id": "898", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, A, B, and C are the three points on \u2299O, if \u2220C = 35.0, then the degree of \u2220OAB is ()\nChoices:\n(A) 35\u00b0\n(B) 55\u00b0\n(C) 65\u00b0\n(D) 70\u00b0", + "choices": [ + "35\u00b0", + "55\u00b0", + "65\u00b0", + "70\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 109, + "img_width": 112, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "900": { + "question_id": "900", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of rubber cars less than the number of brown jets?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "902": { + "question_id": "902", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the leaf base has an angle greater than 90 degrees, what is it called?\nChoices:\n(A) obtuse\n(B) decurrent\n(C) cuneate\n(D) acute", + "choices": [ + "obtuse", + "decurrent", + "cuneate", + "acute" + ], + "answer": "obtuse", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "obtuse", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1429, + "img_width": 1500, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "904": { + "question_id": "904", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "906": { + "question_id": "906", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sum of smallest two value is greater then then largest value?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "908": { + "question_id": "908", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: which organism would most likely have a decrease in its population if decrease the population of ant base of above diagram?\nChoices:\n(A) plant\n(B) human\n(C) lizard\n(D) snake", + "choices": [ + "plant", + "human", + "lizard", + "snake" + ], + "answer": "lizard", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "plant", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 497, + "img_width": 312, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "910": { + "question_id": "910", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue metal balls. Subtract all large matte things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "912": { + "question_id": "912", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 413, + "img_width": 629, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "914": { + "question_id": "914", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny purple shiny cubes. Subtract all large purple balls. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "916": { + "question_id": "916", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220C = 90.0, \u2220A = 30.0, BC = 2.0, the radius of \u2299C is 1.0, point P is the point on the hypotenuse AB, passing point P is a tangent PQ of \u2299C (Point Q is the tangent point), then the minimum value of the line segment PQ is ()\nChoices:\n(A) 2\n(B) \u221a{3}\n(C) \u221a{2}\n(D) 2-\\frac{\u221a{3}}{3}", + "choices": [ + "2", + "\u221a{3}", + "\u221a{2}", + "2-\\frac{\u221a{3}}{3}" + ], + "answer": "\u221a{2}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 145, + "img_width": 112, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "918": { + "question_id": "918", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Calculate the missing item.", + "choices": null, + "answer": "1", + "extraction": "15", + "prediction": "15", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 492, + "img_width": 538, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "920": { + "question_id": "920", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The measure of angle BAC equals x*\\degree. What is the value of x?", + "choices": null, + "answer": "30", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 310, + "img_width": 388, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "922": { + "question_id": "922", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual element in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "924": { + "question_id": "924", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Periwinkle have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 587, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "926": { + "question_id": "926", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the size of the shaded area under the curve? Round the answer to 2 decimal places", + "choices": null, + "answer": "7.07", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 312, + "img_width": 433, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "928": { + "question_id": "928", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much more does a navy blue bath mat cost than a yellow bath towel? (Unit: $)", + "choices": null, + "answer": "5", + "extraction": "7", + "prediction": "7", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 160, + "img_width": 234, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "930": { + "question_id": "930", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cF\u662f\u25b3ABC\u7684\u89d2\u5e73\u5206\u7ebfCD\u548cBE\u7684\u4ea4\u70b9\uff0cCG\u22a5AB\u4e8e\u70b9G\uff0e\u82e5\u2220ACG\uff1d32\u00b0\uff0c\u5219\u2220BFC\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 119\u00b0\n(B) 122\u00b0\n(C) 148\u00b0\n(D) 150\u00b0", + "choices": [ + "119\u00b0", + "122\u00b0", + "148\u00b0", + "150\u00b0" + ], + "answer": "119\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "119\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 79, + "img_width": 113, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "932": { + "question_id": "932", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to the phytoplankton if krill increased?\nChoices:\n(A) decrease\n(B) increase\n(C) can't be predicted\n(D) stay the same", + "choices": [ + "decrease", + "increase", + "can't be predicted", + "stay the same" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 350, + "img_width": 750, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "934": { + "question_id": "934", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "10000", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "936": { + "question_id": "936", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 892, + "img_width": 710, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "938": { + "question_id": "938", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, $m \u22209 = 75$. Find the measure of $\\angle 6$.\nChoices:\n(A) 75\n(B) 85\n(C) 95\n(D) 105", + "choices": [ + "75", + "85", + "95", + "105" + ], + "answer": "105", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "75", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 278, + "img_width": 417, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "940": { + "question_id": "940", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big red things. Subtract all metallic things. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "6", + "prediction": "6", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "942": { + "question_id": "942", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(0)?", + "choices": null, + "answer": "0", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 395, + "img_width": 500, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "944": { + "question_id": "944", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 241, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "946": { + "question_id": "946", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "16", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 373, + "img_width": 560, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "948": { + "question_id": "948", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Some students compared how many blocks they live from school. What is the mean of the numbers?'", + "choices": null, + "answer": "11", + "extraction": "14", + "prediction": "14", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 311, + "img_width": 207, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "950": { + "question_id": "950", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The slope of f(x) at x=0 is ____\nChoices:\n(A) positive\n(B) negative\n(C) zero\n(D) undefined", + "choices": [ + "positive", + "negative", + "zero", + "undefined" + ], + "answer": "positive", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "positive", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 744, + "img_width": 1114, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "952": { + "question_id": "952", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Base your answers on the food web below and on your knowledge of biology. A decrease in the Aquatic crustaceans population will most immediately decrease the available energy for the\nChoices:\n(A) Minnows\n(B) Ducks\n(C) Fish\n(D) Raccoons", + "choices": [ + "Minnows", + "Ducks", + "Fish", + "Raccoons" + ], + "answer": "Fish", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Minnows", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 258, + "img_width": 456, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "954": { + "question_id": "954", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A partial food web is shown below. Which of the following will most likely happen if the snake population decreases?\nChoices:\n(A) Cricket will increase\n(B) Mouse will increase\n(C) Rabbit will increase\n(D) All of above", + "choices": [ + "Cricket will increase", + "Mouse will increase", + "Rabbit will increase", + "All of above" + ], + "answer": "All of above", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Cricket will increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 277, + "img_width": 475, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "956": { + "question_id": "956", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small blue rubber objects. Subtract all brown shiny balls. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "958": { + "question_id": "958", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the missing letters from below to form a word, using all letters presented\nChoices:\n(A) A, R, N\n(B) R, D, N\n(C) I, A, M\n(D) H, O, W", + "choices": [ + "A, R, N", + "R, D, N", + "I, A, M", + "H, O, W" + ], + "answer": "R, D, N", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "A, R, N", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 773, + "img_width": 945, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "960": { + "question_id": "960", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1365, + "img_width": 2048, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "962": { + "question_id": "962", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The value of y at x=10 is ____ that at x=70.\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "smaller than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 301, + "img_width": 387, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "964": { + "question_id": "964", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 70, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "966": { + "question_id": "966", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the pencil to the nearest inch. The pencil is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "7", + "prediction": "7", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 166, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "968": { + "question_id": "968", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue balls. Subtract all big yellow rubber balls. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "970": { + "question_id": "970", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u4e24\u76f4\u7ebfa\uff0cb\u88ab\u76f4\u7ebfc\u6240\u622a\uff0c\u5df2\u77e5a\u2225b\uff0c\u22201\uff1d62\u00b0\uff0c\u5219\u22202\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 62\u00b0\n(B) 108\u00b0\n(C) 118\u00b0\n(D) 128\u00b0", + "choices": [ + "62\u00b0", + "108\u00b0", + "118\u00b0", + "128\u00b0" + ], + "answer": "118\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "62\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 141, + "img_width": 135, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "972": { + "question_id": "972", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of yellow shiny utility bikes greater than the number of brown metallic cruisers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "974": { + "question_id": "974", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there the same number of big blue trucks and large purple metal double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "976": { + "question_id": "976", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of metal biplanes behind the purple shiny object less than the number of purple school buss behind the big red object?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "978": { + "question_id": "978", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Allie kept a written log of how many miles she biked during the past 7 days. What is the range of the numbers?'", + "choices": null, + "answer": "7", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 280, + "img_width": 230, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "980": { + "question_id": "980", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest number shown?", + "choices": null, + "answer": "12", + "extraction": "12", + "prediction": "12", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 640, + "img_width": 429, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "982": { + "question_id": "982", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the states that border Wyoming , does South Dakota have the highest value ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "984": { + "question_id": "984", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of gray cars less than the number of small metallic minivans?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "986": { + "question_id": "986", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0cAD\u662f\u89d2\u5e73\u5206\u7ebf\uff0cAE\u662f\u9ad8\uff0e\u82e5\u2220B\uff1d40\u00b0\uff0c\u2220C\uff1d70\u00b0\uff0c\u5219\u2220EAD\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 10\u00b0\n(B) 15\u00b0\n(C) 17.5\u00b0\n(D) 20\u00b0", + "choices": [ + "10\u00b0", + "15\u00b0", + "17.5\u00b0", + "20\u00b0" + ], + "answer": "15\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 68, + "img_width": 101, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "988": { + "question_id": "988", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 333, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "990": { + "question_id": "990", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\odot S$, $m \\widehat {PQR}=98$, Find $m \\widehat {PQ}$.\nChoices:\n(A) 45\n(B) 49\n(C) 90\n(D) 98", + "choices": [ + "45", + "49", + "90", + "98" + ], + "answer": "49", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 452, + "img_width": 544, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "992": { + "question_id": "992", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of purple metallic things that are behind the small green motorbike less than the number of blue metal articulated buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "994": { + "question_id": "994", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Magenta greater than Web Maroon?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 548, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "996": { + "question_id": "996", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big shiny balls. Subtract all blue rubber blocks. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "998": { + "question_id": "998", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff1a\u2220AOB\uff1a\u2220BOC\uff1a\u2220COD\uff1d2\uff1a3\uff1a4\uff0c\u5c04\u7ebfOM\u3001ON\uff0c\u5206\u522b\u5e73\u5206\u2220AOB\u4e0e\u2220COD\uff0c\u53c8\u2220MON\uff1d84\u00b0\uff0c\u5219\u2220AOB\u4e3a\uff08\uff09\nChoices:\n(A) 28\u00b0\n(B) 30\u00b0\n(C) 32\u00b0\n(D) 38\u00b0", + "choices": [ + "28\u00b0", + "30\u00b0", + "32\u00b0", + "38\u00b0" + ], + "answer": "28\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "28\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 181, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "1000": { + "question_id": "1000", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown matte cylinders. Subtract all big purple matte things. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + } +} \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mathvista_testmini.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mathvista_testmini.json new file mode 100644 index 0000000000000000000000000000000000000000..40896c215af4c442d88adacb9e68bec3586a8b9e --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mathvista_testmini.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:843ba4f9777b2c69442aa0ea4e48e2845ed19bb9ed320b49c8647cc3da343c28 +size 45272419 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mme.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mme.json new file mode 100644 index 0000000000000000000000000000000000000000..7327f751e5bb21e8a2f85696035823e4afa60294 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mme.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c5033ba470671eeca7bf50fb890fbf8716c3cb6e2ac150839a8111b765e658d +size 94631595 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mmmu_val.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mmmu_val.json new file mode 100644 index 0000000000000000000000000000000000000000..25c1a9c01f3c09441925499bbde31fc66d3aba00 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mmmu_val.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47bd208990c79ee8ec97895fccc9d91a4b18e85291f2892d986c4839972eac86 +size 36750492 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mmstar.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mmstar.json new file mode 100644 index 0000000000000000000000000000000000000000..53f748a37fbe6bedd019a3b79ea4bf81e79ee73a --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mmstar.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4829e00e1150da10de6554ddf43d0512f21f6ce92e6db3d896e4ca0cab1e669 +size 60427313 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/rank0_metric_eval_done.txt b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/rank0_metric_eval_done.txt new file mode 100644 index 0000000000000000000000000000000000000000..b9c064df42468d805177a80623c54c976c8d760e --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/rank0_metric_eval_done.txt @@ -0,0 +1 @@ +rank 0 eval done \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/rank1_metric_eval_done.txt b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/rank1_metric_eval_done.txt new file mode 100644 index 0000000000000000000000000000000000000000..36792c9cedb6c006db3a866d72eac15f0ce6a64a --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/rank1_metric_eval_done.txt @@ -0,0 +1 @@ +rank 1 eval done \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/results.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/results.json new file mode 100644 index 0000000000000000000000000000000000000000..023f513f69d8fd7b0567efe306b244300c6936d3 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/results.json @@ -0,0 +1,285 @@ +{ + "results": { + "mathvista_testmini": { + "gpt_eval_score,none": 23.7, + "gpt_eval_score_stderr,none": "N/A", + "alias": "mathvista_testmini" + }, + "mme": { + "mme_cognition_score,none": 321.7857142857143, + "mme_cognition_score_stderr,none": "N/A", + "mme_percetion_score,none": 1418.2278911564626, + "mme_percetion_score_stderr,none": "N/A", + "alias": "mme" + }, + "mmmu_val": { + "mmmu_acc,none": 0.41222, + "mmmu_acc_stderr,none": "N/A", + "alias": "mmmu_val" + }, + "mmstar": { + "coarse perception,none": 0.6918706627011363, + "coarse perception_stderr,none": "N/A", + "fine-grained perception,none": 0.3625644804716286, + "fine-grained perception_stderr,none": "N/A", + "instance reasoning,none": 0.5205089434882838, + "instance reasoning_stderr,none": "N/A", + "logical reasoning,none": 0.3660535284297661, + "logical reasoning_stderr,none": "N/A", + "math,none": 0.28080727078321305, + "math_stderr,none": "N/A", + "science & technology,none": 0.19842818316868963, + "science & technology_stderr,none": "N/A", + "alias": "mmstar" + } + }, + "configs": { + "mathvista_testmini": { + "task": "mathvista_testmini", + "dataset_path": "AI4Math/MathVista", + "dataset_kwargs": { + "token": true + }, + "test_split": "testmini", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "gpt_eval_score", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "until": [ + "ASSISTANT:" + ], + "max_new_tokens": 1024, + "temperature": 0.0, + "top_p": 1.0, + "num_beams": 1, + "do_sample": false, + "image_aspect_ratio": "original" + }, + "repeats": 1, + "should_decontaminate": false, + "model_specific_prompt_kwargs": { + "default": { + "shot_type": "format-prompt", + "shot": 0, + "use_caption": false, + "use_ocr": false + }, + "phi3v": { + "shot_type": "solution" + } + }, + "model_specific_generation_kwargs": { + "llava": { + "image_aspect_ratio": "original" + } + } + }, + "mme": { + "task": "mme", + "dataset_path": "lmms-lab/MME", + "dataset_kwargs": { + "token": false + }, + "test_split": "test", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "mme_percetion_score", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "mme_cognition_score", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "max_new_tokens": 16, + "temperature": 0.0, + "top_p": 1.0, + "num_beams": 1, + "do_sample": false, + "until": [ + "\n\n" + ] + }, + "repeats": 1, + "should_decontaminate": false, + "metadata": [ + { + "version": 0.0 + } + ], + "model_specific_prompt_kwargs": { + "default": { + "pre_prompt": "", + "post_prompt": "\nAnswer the question using a single word or phrase." + }, + "gpt4v": { + "pre_prompt": "", + "post_prompt": "\nAnswer the question with Yes or No." + }, + "qwen_vl": { + "pre_prompt": "", + "post_prompt": " Answer:" + }, + "otterhd": { + "pre_prompt": "", + "post_prompt": " Answer:" + }, + "xcomposer2_4khd": { + "pre_prompt": "[UNUSED_TOKEN_146]user\n", + "post_prompt": " Answer this question briefly[UNUSED_TOKEN_145]\n[UNUSED_TOKEN_146]assistant\n" + } + } + }, + "mmmu_val": { + "task": "mmmu_val", + "dataset_path": "lmms-lab/MMMU", + "test_split": "validation", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "mmmu_acc", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "max_new_tokens": 128, + "until": [ + "\n\n" + ], + "image_aspect_ratio": "original" + }, + "repeats": 1, + "should_decontaminate": false, + "metadata": [ + { + "version": 0.0 + } + ], + "model_specific_generation_kwargs": { + "llava": { + "image_aspect_ratio": "original" + } + } + }, + "mmstar": { + "task": "mmstar", + "dataset_path": "Lin-Chen/MMStar", + "dataset_kwargs": { + "token": true + }, + "test_split": "val", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "coarse perception", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "fine-grained perception", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "instance reasoning", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "logical reasoning", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "science & technology", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "math", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "until": [ + "\n\n" + ], + "do_sample": false + }, + "repeats": 1, + "should_decontaminate": false, + "metadata": [ + { + "version": 0.0 + } + ], + "model_specific_prompt_kwargs": { + "default": { + "pre_prompt": "", + "post_prompt": "\nAnswer with the option's letter from the given choices directly" + } + } + } + }, + "versions": { + "mathvista_testmini": "Yaml", + "mme": "Yaml", + "mmmu_val": "Yaml", + "mmstar": "Yaml" + }, + "n-shot": { + "mathvista_testmini": 0, + "mme": 0, + "mmmu_val": 0, + "mmstar": 0 + }, + "model_configs": { + "model": "llava", + "model_args": "pretrained=/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/SMOE/665K36/revise_Full_smoe_sharev3/checkpoint-12477,conv_template=phi35", + "batch_size": "1", + "device": null, + "limit": null, + "bootstrap_iters": 100000, + "gen_kwargs": "" + }, + "git_hash": "289c7fe5" +} \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/submissions/mathvista_testmini_scores.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/submissions/mathvista_testmini_scores.json new file mode 100644 index 0000000000000000000000000000000000000000..b66f5e9f22fe7cf78a7a2b7993b1e7077ab05668 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/submissions/mathvista_testmini_scores.json @@ -0,0 +1,26873 @@ +{ + "1": { + "question_id": "1", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: When a spring does work on an object, we cannot find the work by simply multiplying the spring force by the object's displacement. The reason is that there is no one value for the force-it changes. However, we can split the displacement up into an infinite number of tiny parts and then approximate the force in each as being constant. Integration sums the work done in all those parts. Here we use the generic result of the integration.\r\n\r\nIn Figure, a cumin canister of mass $m=0.40 \\mathrm{~kg}$ slides across a horizontal frictionless counter with speed $v=0.50 \\mathrm{~m} / \\mathrm{s}$. It then runs into and compresses a spring of spring constant $k=750 \\mathrm{~N} / \\mathrm{m}$. When the canister is momentarily stopped by the spring, by what distance $d$ is the spring compressed?", + "choices": null, + "answer": "1.2", + "extraction": "0.25", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 720, + "img_width": 1514, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "3": { + "question_id": "3", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u25b3ABC\u7684\u4e24\u5185\u89d2\u5e73\u5206\u7ebfOB\u3001OC\u76f8\u4ea4\u4e8e\u70b9O\uff0c\u82e5\u2220A\uff1d110\u00b0\uff0c\u5219\u2220BOC\uff1d\uff08\uff09\nChoices:\n(A) 135\u00b0\n(B) 140\u00b0\n(C) 145\u00b0\n(D) 150\u00b0", + "choices": [ + "135\u00b0", + "140\u00b0", + "145\u00b0", + "150\u00b0" + ], + "answer": "145\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "135\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 60, + "img_width": 131, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "5": { + "question_id": "5", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m\\angle H$\nChoices:\n(A) 97\n(B) 102\n(C) 107\n(D) 122", + "choices": [ + "97", + "102", + "107", + "122" + ], + "answer": "97", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "97", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 245, + "img_width": 322, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "7": { + "question_id": "7", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) after eight.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "9": { + "question_id": "9", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\u662f\u4e00\u682a\u7f8e\u4e3d\u7684\u52fe\u80a1\u6811\uff0c\u5176\u4e2d\u6240\u6709\u56db\u8fb9\u5f62\u90fd\u662f\u6b63\u65b9\u5f62\uff0c\u6240\u6709\u7684\u4e09\u89d2\u5f62\u90fd\u662f\u76f4\u89d2\u4e09\u89d2\u5f62\uff0c\u82e5\u6b63\u65b9\u5f62A\u3001B\u7684\u9762\u79ef\u5206\u522b\u4e3a5\u30013\uff0c\u5219\u6700\u5927\u6b63\u65b9\u5f62C\u7684\u9762\u79ef\u662f\uff08\uff09\nChoices:\n(A) 15\n(B) 13\n(C) 11\n(D) 8", + "choices": [ + "15", + "13", + "11", + "8" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 155, + "img_width": 134, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "11": { + "question_id": "11", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red things. Subtract all tiny matte balls. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "13": { + "question_id": "13", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many objects are preferred by more than 90 percent of people in at least one category?", + "choices": null, + "answer": "0", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "15": { + "question_id": "15", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism with be most affected if algae was eliminated?\nChoices:\n(A) Tilapia\n(B) Common water flea\n(C) Great diving beetle\n(D) Tadpole", + "choices": [ + "Tilapia", + "Common water flea", + "Great diving beetle", + "Tadpole" + ], + "answer": "Common water flea", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Tilapia", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 232, + "img_width": 400, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "17": { + "question_id": "17", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220ACB\uff1d90\u00b0\uff0cD\u662fAB\u7684\u4e2d\u70b9\uff0cAB\uff1d10\uff0c\u5219CD\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 5\n(B) 6\n(C) 8\n(D) 10", + "choices": [ + "5", + "6", + "8", + "10" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 172, + "img_width": 125, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "19": { + "question_id": "19", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the highest amount this class measures?", + "choices": null, + "answer": "400", + "extraction": "400", + "prediction": "400", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 684, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "21": { + "question_id": "21", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 4 dots divided into 2 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 418, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "23": { + "question_id": "23", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The derivative of f(x) at x=2 is ____ that at x=5\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "equal to", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 393, + "img_width": 552, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "25": { + "question_id": "25", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Periwinkle the smoothest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 770, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "27": { + "question_id": "27", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "11", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1752, + "img_width": 2628, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "29": { + "question_id": "29", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 440, + "img_width": 670, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "31": { + "question_id": "31", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there more big red rubber double buss in front of the large red double bus than big green things?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "33": { + "question_id": "33", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use a sector paper sheet with a central angle of 120.0 and a radius of 6.0 to roll into a conical bottomless paper cap (as shown in the picture), then the bottom perimeter of the paper cap is ()\nChoices:\n(A) 2\u03c0cm\n(B) 3\u03c0cm\n(C) 4\u03c0cm\n(D) 5\u03c0cm", + "choices": [ + "2\u03c0cm", + "3\u03c0cm", + "4\u03c0cm", + "5\u03c0cm" + ], + "answer": "4\u03c0cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2\u03c0cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 95, + "img_width": 331, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "35": { + "question_id": "35", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u662f\u2299O\u7684\u76f4\u5f84\uff0cEF\uff0cEB\u662f\u2299O\u7684\u5f26\uff0c\u70b9E\u662fFEB\u7684\u4e2d\u70b9\uff0cEF\u4e0eAB\u4ea4\u4e8e\u70b9C\uff0c\u8fde\u63a5OF\uff0c\u82e5\u2220AOF\uff1d40\u00b0\uff0c\u5219\u2220F\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 20\u00b0\n(B) 35\u00b0\n(C) 40\u00b0\n(D) 55\u00b0", + "choices": [ + "20\u00b0", + "35\u00b0", + "40\u00b0", + "55\u00b0" + ], + "answer": "35\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 141, + "img_width": 151, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "37": { + "question_id": "37", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the limit as x approaches -1?", + "choices": null, + "answer": "3", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 410, + "img_width": 408, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "39": { + "question_id": "39", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function odd or even?\nChoices:\n(A) odd\n(B) even", + "choices": [ + "odd", + "even" + ], + "answer": "odd", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "odd", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 304, + "img_width": 433, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "41": { + "question_id": "41", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 3491, + "img_width": 5236, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "43": { + "question_id": "43", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use the graph to answer the question below. Which month is the wettest on average in Christchurch?\nChoices:\n(A) August\n(B) April\n(C) May", + "choices": [ + "August", + "April", + "May" + ], + "answer": "May", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "August", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "elementary school", + "img_height": 323, + "img_width": 449, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "45": { + "question_id": "45", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An administrator at the Department of Motor Vehicles (DMV) tracked the average wait time from month to month. According to the table, what was the rate of change between August and September? (Unit: minutes per month)", + "choices": null, + "answer": "-3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 273, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "47": { + "question_id": "47", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all rubber balls. Subtract all yellow shiny things. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "6", + "prediction": "6", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "49": { + "question_id": "49", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the digits on either end of the sign in the corner?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 476, + "img_width": 626, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "51": { + "question_id": "51", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of gray rubber objects in front of the small yellow aeroplane greater than the number of big cyan matte fighters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "53": { + "question_id": "53", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 593, + "img_width": 800, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "55": { + "question_id": "55", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u4e00\u5757\u76f4\u89d2\u4e09\u89d2\u677f60\u00b0\u7684\u89d2\u7684\u9876\u70b9A\u4e0e\u76f4\u89d2\u9876\u70b9C\u5206\u522b\u5728\u4e24\u5e73\u884c\u7ebfFG\uff0cDE\u4e0a\uff0c\u659c\u8fb9AB\u5e73\u5206\u2220CAG\uff0c\u4ea4\u76f4\u7ebfDE\u4e8e\u70b9H\uff0c\u5219\u2220BCH\u7684\u5927\u5c0f\u4e3a\uff08\uff09\nChoices:\n(A) 60\u00b0\n(B) 45\u00b0\n(C) 30\u00b0\n(D) 25\u00b0", + "choices": [ + "60\u00b0", + "45\u00b0", + "30\u00b0", + "25\u00b0" + ], + "answer": "30\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 125, + "img_width": 175, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "57": { + "question_id": "57", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small balls. Subtract all blue rubber things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "59": { + "question_id": "59", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, CD is the chord of \u2299O, \u2220ADC = 26.0, then the degree of \u2220CAB is ()\nChoices:\n(A) 26\u00b0\n(B) 74\u00b0\n(C) 64\u00b0\n(D) 54\u00b0", + "choices": [ + "26\u00b0", + "74\u00b0", + "64\u00b0", + "54\u00b0" + ], + "answer": "64\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "26\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 146, + "img_width": 157, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "61": { + "question_id": "61", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Coral the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 427, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "63": { + "question_id": "63", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red matte cubes. Subtract all small green metal objects. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "65": { + "question_id": "65", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: is f(3) > 0?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 325, + "img_width": 327, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "67": { + "question_id": "67", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the square?", + "choices": null, + "answer": "16", + "extraction": "16", + "prediction": "16", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 292, + "img_width": 320, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "69": { + "question_id": "69", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big matte balls. Subtract all green rubber objects. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "71": { + "question_id": "71", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the rectangle?", + "choices": null, + "answer": "18", + "extraction": "18", + "prediction": "18", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 292, + "img_width": 187, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "73": { + "question_id": "73", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Complete the matrix.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "D", + "extraction": "A", + "prediction": "A", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 654, + "img_width": 387, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "75": { + "question_id": "75", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Sky Blue less than Web Maroon?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "77": { + "question_id": "77", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year showed the largest difference in the data points between the two lines", + "choices": null, + "answer": "2019", + "extraction": "2014", + "prediction": "2014", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "79": { + "question_id": "79", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A, B, C, and D are on circle O, and point E is on the extended line of AD. If \u2220ABC = 60.0, then the degree of \u2220CDE is ()\nChoices:\n(A) 30\u00b0\n(B) 45\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "30\u00b0", + "45\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "60\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 104, + "img_width": 123, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "81": { + "question_id": "81", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of r at theta=3*pi/2?", + "choices": null, + "answer": "-1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 460, + "img_width": 616, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "83": { + "question_id": "83", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of shiny buss less than the number of matte things?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "85": { + "question_id": "85", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many countries have people working for more than 35 hours over the years?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "87": { + "question_id": "87", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At a price of $790, is there a shortage or a surplus?'\nChoices:\n(A) shortage\n(B) surplus", + "choices": [ + "shortage", + "surplus" + ], + "answer": "surplus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "shortage", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 353, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "89": { + "question_id": "89", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many miles per gallon do an average city bus get?", + "choices": null, + "answer": "25", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 384, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "91": { + "question_id": "91", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of brown suvs less than the number of brown rubber school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "93": { + "question_id": "93", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What's the computing and wirless total for semiconductor demand in 2014?", + "choices": null, + "answer": "197.3", + "extraction": "10.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "95": { + "question_id": "95", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the straight lines AB and CD intersect at point O, OD bisects \u2220AOE, \u2220BOC = 50.0, then \u2220EOB = ()\nChoices:\n(A) 50\u00b0\n(B) 60\u00b0\n(C) 70\u00b0\n(D) 80\u00b0", + "choices": [ + "50\u00b0", + "60\u00b0", + "70\u00b0", + "80\u00b0" + ], + "answer": "80\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 162, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "97": { + "question_id": "97", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracies higher than 9?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "99": { + "question_id": "99", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which cat is larger?\nChoices:\n(A) white five\n(B) white three\n(C) white four\n(D) white one\n(E) white two", + "choices": [ + "white five", + "white three", + "white four", + "white one", + "white two" + ], + "answer": "white one", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "white five", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "101": { + "question_id": "101", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which shape is most erect?\nChoices:\n(A) Lanceolate\n(B) Heart-shaped\n(C) Linear\n(D) Spatulate", + "choices": [ + "Lanceolate", + "Heart-shaped", + "Linear", + "Spatulate" + ], + "answer": "Linear", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Lanceolate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1204, + "img_width": 376, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "103": { + "question_id": "103", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small purple matte blocks. Subtract all blocks. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "105": { + "question_id": "105", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Violet have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 727, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "107": { + "question_id": "107", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past six.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "109": { + "question_id": "109", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny balls. Subtract all green metallic things. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "5", + "prediction": "5", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "111": { + "question_id": "111", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big gray matte things. Subtract all small metallic cylinders. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "113": { + "question_id": "113", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many baseballs are there?", + "choices": null, + "answer": "20", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 458, + "img_width": 721, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "115": { + "question_id": "115", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1079, + "img_width": 826, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "117": { + "question_id": "117", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the range of this function?\nChoices:\n(A) [0, 2]\n(B) [3, 2]\n(C) [2, 4]\n(D) [-3, 4]", + "choices": [ + "[0, 2]", + "[3, 2]", + "[2, 4]", + "[-3, 4]" + ], + "answer": "[0, 2]", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "[0, 2]", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 356, + "img_width": 460, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "119": { + "question_id": "119", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, P is a point outside \u2299O, PA and PB intersect \u2299O at two points C and D respectively. It is known that the central angles of \u2040AB and \u2040CD are 90.0 and 50.0 respectively, then \u2220P = ()\nChoices:\n(A) 45\u00b0\n(B) 40\u00b0\n(C) 25\u00b0\n(D) 20\u00b0", + "choices": [ + "45\u00b0", + "40\u00b0", + "25\u00b0", + "20\u00b0" + ], + "answer": "20\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 165, + "img_width": 103, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "121": { + "question_id": "121", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In trying to calculate how much money could be saved by packing lunch, Manny recorded the amount he spent on lunch each day. According to the table, what was the rate of change between Wednesday and Thursday? (Unit: $, per day)", + "choices": null, + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 235, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "123": { + "question_id": "123", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagram represents successive rotations, starting from the top down. Which shape comes next?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "D", + "extraction": "B", + "prediction": "B", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 579, + "img_width": 412, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "125": { + "question_id": "125", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens if caterpillars decrease?\nChoices:\n(A) plants decrease\n(B) plants increase\n(C) nothing happens\n(D) none of the above", + "choices": [ + "plants decrease", + "plants increase", + "nothing happens", + "none of the above" + ], + "answer": "plants increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "plants decrease", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 947, + "img_width": 850, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "127": { + "question_id": "127", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much more accurate is the most accurate algorithm compared the least accurate algorithm?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "129": { + "question_id": "129", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the twig to the nearest inch. The twig is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 156, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "131": { + "question_id": "131", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have value below 40?", + "choices": null, + "answer": "3", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "133": { + "question_id": "133", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the merchandise exports greater than 0.92 %?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1268, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "135": { + "question_id": "135", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of buss that are in front of the big yellow aeroplane less than the number of matte bicycles that are on the right side of the tiny thing?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "137": { + "question_id": "137", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function (f: R to R) injective?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 291, + "img_width": 258, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "139": { + "question_id": "139", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Indigo have the lowest value?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 543, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "141": { + "question_id": "141", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is a long ladder leaning on the wall, the foot of the ladder B is away from the wall 1.6, the point D on the ladder is away from the wall 1.4, the length of BD is 0.55, then the length of the ladder is ()\nChoices:\n(A) 3.85\u7c73\n(B) 4.00\u7c73\n(C) 4.40\u7c73\n(D) 4.50\u7c73", + "choices": [ + "3.85\u7c73", + "4.00\u7c73", + "4.40\u7c73", + "4.50\u7c73" + ], + "answer": "4.40\u7c73", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3.85\u7c73", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 128, + "img_width": 78, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "143": { + "question_id": "143", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the parallelogram ABCD, CE bisects \u2220BCD and it intersects the AD edge at point E, and DE = 3.0, then the length of AB is ()\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 6", + "choices": [ + "1", + "2", + "3", + "6" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 85, + "img_width": 204, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "145": { + "question_id": "145", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Can you find the missing term?", + "choices": null, + "answer": "10", + "extraction": "15", + "prediction": "15", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 506, + "img_width": 900, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "147": { + "question_id": "147", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagrams below show two pure samples of gas in identical closed, rigid containers. Each colored ball represents one gas particle. Both samples have the same number of particles. Compare the average kinetic energies of the particles in each sample. Which sample has the higher temperature?\nChoices:\n(A) neither; the samples have the same temperature\n(B) sample B\n(C) sample A", + "choices": [ + "neither; the samples have the same temperature", + "sample B", + "sample A" + ], + "answer": "sample B", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; the samples have the same temperature", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 405, + "img_width": 563, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "149": { + "question_id": "149", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfl1\u2225l2\uff0c\u22201\uff1d50\u00b0\uff0c\u22202\uff1d75\u00b0\uff0c\u5219\u22203\uff1d\uff08\uff09\nChoices:\n(A) 55\u00b0\n(B) 60\u00b0\n(C) 65\u00b0\n(D) 70\u00b0", + "choices": [ + "55\u00b0", + "60\u00b0", + "65\u00b0", + "70\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "55\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 93, + "img_width": 156, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "151": { + "question_id": "151", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When does the function reach its local maximum?\nChoices:\n(A) (u1, u2) = (0, 0)\n(B) (u1, u2) = (1, 0)\n(C) (u1, u2) = (0, 1)\n(D) (u1, u2) = (1, 1)", + "choices": [ + "(u1, u2) = (0, 0)", + "(u1, u2) = (1, 0)", + "(u1, u2) = (0, 1)", + "(u1, u2) = (1, 1)" + ], + "answer": "(u1, u2) = (0, 0)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(u1, u2) = (0, 0)", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 325, + "img_width": 458, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "153": { + "question_id": "153", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would be impacted by an increase in owls?\nChoices:\n(A) sun\n(B) grasshoppers\n(C) grass\n(D) mice", + "choices": [ + "sun", + "grasshoppers", + "grass", + "mice" + ], + "answer": "mice", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "sun", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 423, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "155": { + "question_id": "155", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Web Green have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 601, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "157": { + "question_id": "157", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9335", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 279, + "img_width": 637, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "159": { + "question_id": "159", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between two consecutive major ticks on the Y-axis ?", + "choices": null, + "answer": "100", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1000, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "161": { + "question_id": "161", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the two numbers visible in the picture?", + "choices": null, + "answer": "71", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "163": { + "question_id": "163", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "7519", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 285, + "img_width": 637, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "165": { + "question_id": "165", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all cyan rubber cylinders. Subtract all tiny shiny cubes. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "167": { + "question_id": "167", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the biggest zero of this function?", + "choices": null, + "answer": "2", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1920, + "img_width": 1920, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "169": { + "question_id": "169", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between two consecutive major ticks on the Y-axis ?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1049, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "171": { + "question_id": "171", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many cinnamon rolls are there?", + "choices": null, + "answer": "20", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 190, + "img_width": 467, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "173": { + "question_id": "173", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of small rubber buss behind the big green road bike less than the number of suvs that are behind the large brown matte truck?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "175": { + "question_id": "175", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of accuracies of the algorithm liver for all the datasets?", + "choices": null, + "answer": "24", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "177": { + "question_id": "177", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of brown tandem bikes that are to the left of the small blue matte car greater than the number of tiny blue biplanes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "179": { + "question_id": "179", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0c\u5df2\u77e5AC\uff1d4cm\uff0c\u82e5\u25b3ACD\u7684\u5468\u957f\u4e3a14cm\uff0c\u5219ABCD\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 14cm\n(B) 28cm\n(C) 10cm\n(D) 20cm", + "choices": [ + "14cm", + "28cm", + "10cm", + "20cm" + ], + "answer": "20cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "14cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 94, + "img_width": 157, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "181": { + "question_id": "181", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option is correct?\nChoices:\n(A) A\n(B) B\n(C) C", + "choices": [ + "A", + "B", + "C" + ], + "answer": "C", + "extraction": "A", + "prediction": "A", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 332, + "img_width": 864, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "183": { + "question_id": "183", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown cubes. Subtract all gray cylinders. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "185": { + "question_id": "185", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: An image has the gray level PDF $p_r(r)$ shown in Fig. Q1a. One wants to do histogram specification SO that the processed image will have the specified $p_z(z)$ shown in Fig. Q1b. Can we use intensity mapping function $T: z=1-r$ to achieve the goal?\nChoices:\n(A) True\n(B) False", + "choices": [ + "True", + "False" + ], + "answer": "False", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "True", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 376, + "img_width": 724, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "187": { + "question_id": "187", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9015", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 279, + "img_width": 634, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "189": { + "question_id": "189", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest accuracy reported in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "191": { + "question_id": "191", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the volume of the air carriers in Ethiopia greater than the average volume of the air carriers in Ethiopia taken over all years ?", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1116, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "193": { + "question_id": "193", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red things. Subtract all cylinders. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "195": { + "question_id": "195", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u662f\u2299O\u7684\u76f4\u5f84\uff0cC\uff0cD\u4e24\u70b9\u5728\u2299O\u4e0a\uff0c\u2220BCD\uff1d25\u00b0\uff0c\u5219\u2220AOD\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 120\u00b0\n(B) 125\u00b0\n(C) 130\u00b0\n(D) 135\u00b0", + "choices": [ + "120\u00b0", + "125\u00b0", + "130\u00b0", + "135\u00b0" + ], + "answer": "130\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "120\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 95, + "img_width": 110, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "197": { + "question_id": "197", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many sequences have negative Influence Scores?", + "choices": null, + "answer": "2", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "bar chart", + "grade": "college", + "img_height": 772, + "img_width": 1766, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "199": { + "question_id": "199", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Figure 23-42 is a section of a conducting rod of radius $R_1=1.30 \\mathrm{~mm}$ and length $L=$ $11.00 \\mathrm{~m}$ inside a thin-walled coaxial conducting cylindrical shell of radius $R_2=10.0 R_1$ and the (same) length $L$. The net charge on the rod is $Q_1=+3.40 \\times 10^{-12} \\mathrm{C}$; that on the shell is $Q_2=-2.00 Q_1$. What is the magnitude $E$ of the electric field at radial distance $r=2.00 R_2$?", + "choices": null, + "answer": "0.21", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 303, + "img_width": 262, + "language": "english", + "skills": [ + "algebraic reasoning", + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "201": { + "question_id": "201", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of all the values in the border group?", + "choices": null, + "answer": "19", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "203": { + "question_id": "203", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u57285\u00d74\u7684\u6b63\u65b9\u5f62\u7f51\u683c\u4e2d\uff0c\u6bcf\u4e2a\u5c0f\u6b63\u65b9\u5f62\u7684\u8fb9\u957f\u90fd\u662f1\uff0c\u25b3ABC\u7684\u9876\u70b9\u90fd\u5728\u8fd9\u4e9b\u5c0f\u6b63\u65b9\u5f62\u7684\u9876\u70b9\u4e0a\uff0c\u5219tan\u2220BAC\u7684\u503c\u4e3a\uff08\uff09\nChoices:\n(A) \\frac{4}{3}\n(B) 0.75\n(C) 0.6\n(D) 0.8", + "choices": [ + "\\frac{4}{3}", + "0.75", + "0.6", + "0.8" + ], + "answer": "\\frac{4}{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{4}{3}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 151, + "img_width": 172, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "205": { + "question_id": "205", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A statistician analyzed the number of runs scored by players last season. How many players scored more than 2 runs last season?'", + "choices": null, + "answer": "24", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 190, + "img_width": 351, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "207": { + "question_id": "207", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms magic and secure?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "209": { + "question_id": "209", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the highest value in black line chart ?", + "choices": null, + "answer": "28.3", + "extraction": "1.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "211": { + "question_id": "211", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracies higher than 2?", + "choices": null, + "answer": "6", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "213": { + "question_id": "213", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In which year there was lowest per capita real gross domestic product of ohio?", + "choices": null, + "answer": "2001", + "extraction": "2009", + "prediction": "2009", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "215": { + "question_id": "215", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Layla went on a camping trip and logged the number of miles she hiked each day. What is the range of the numbers?'", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 249, + "img_width": 212, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "217": { + "question_id": "217", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the degree of this function?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 202, + "img_width": 304, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "219": { + "question_id": "219", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "221": { + "question_id": "221", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, A, B, C are three points on \u2299O, \u2220ACB = 25.0, then the degree of \u2220BAO is ()\nChoices:\n(A) 50\u00b0\n(B) 55\u00b0\n(C) 60\u00b0\n(D) 65\u00b0", + "choices": [ + "50\u00b0", + "55\u00b0", + "60\u00b0", + "65\u00b0" + ], + "answer": "65\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 108, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "223": { + "question_id": "223", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this an even function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 776, + "img_width": 1430, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "225": { + "question_id": "225", + "query": "Hint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Fig. Q4 shows the contour of an object. Represent it with an 8-directional chain code. The resultant chain code should be normalized with respect to the starting point of the chain code. Represent the answer as a list with each digit as a element.", + "choices": null, + "answer": "[0, 2, 0, 2, 1, 7, 1, 2, 0, 3, 0, 6]", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "true_false": false, + "question_type": "free_form", + "answer_type": "list", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 560, + "img_width": 846, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "227": { + "question_id": "227", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 580, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "229": { + "question_id": "229", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the highest lysine level given?\nChoices:\n(A) 0.33%\n(B) 0.31%\n(C) 0.29%\n(D) 0.32%\n(E) 0.30%", + "choices": [ + "0.33%", + "0.31%", + "0.29%", + "0.32%", + "0.30%" + ], + "answer": "0.30%", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.33%", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2185, + "img_width": 1683, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "231": { + "question_id": "231", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model has the overall best ImageNet 10shot Accuracy score across different training steps?\nChoices:\n(A) Identity\n(B) Uniform\n(C) Uniform / Soft\n(D) Soft / Uniform\n(E) Soft\n(F) Dense", + "choices": [ + "Identity", + "Uniform", + "Uniform / Soft", + "Soft / Uniform", + "Soft", + "Dense" + ], + "answer": "Soft", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Identity", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 988, + "img_width": 2002, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "233": { + "question_id": "233", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 199, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "235": { + "question_id": "235", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the epigraph of a function f an infinite set?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 266, + "img_width": 412, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "237": { + "question_id": "237", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the Red squirrel and deer mouse population were to decrease, what would happen to the deer tick population?\nChoices:\n(A) increase\n(B) fluctuate\n(C) it would decrease\n(D) stay the same", + "choices": [ + "increase", + "fluctuate", + "it would decrease", + "stay the same " + ], + "answer": "it would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 346, + "img_width": 400, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "239": { + "question_id": "239", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Sky Blue the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 769, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "241": { + "question_id": "241", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many models in the table have a model size larger than 10B?", + "choices": null, + "answer": "11", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1184, + "img_width": 1570, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "243": { + "question_id": "243", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: \u0686\u0646\u062f \u0639\u062f\u062f \u0634\u06cc\u0631\u06cc\u0646\u06cc \u0645\u062b\u0644\u062b\u06cc \u0634\u06a9\u0644 \u062f\u0631 \u062c\u0639\u0628\u0647 \u0627\u0633\u062a\u061f", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1001, + "img_width": 564, + "language": "persian", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "ParsVQA-Caps", + "split": "testmini", + "task": "visual question answering" + }, + "245": { + "question_id": "245", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Damon need to buy a grilled steak and a mushroom pizza? (Unit: $)", + "choices": null, + "answer": "24", + "extraction": "22", + "prediction": "22", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 128, + "img_width": 259, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "247": { + "question_id": "247", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: A spaceship of mass $m=4.50 \\times 10^3 \\mathrm{~kg}$ is in a circular Earth orbit of radius $r=8.00 \\times 10^6 \\mathrm{~m}$ and period $T_0=118.6 \\mathrm{~min}=$ $7.119 \\times 10^3 \\mathrm{~s}$ when a thruster is fired in the forward direction to decrease the speed to $96.0 \\%$ of the original speed. What is the period $T$ of the resulting elliptical orbit (Figure)?", + "choices": null, + "answer": "6.36", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 906, + "img_width": 914, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "249": { + "question_id": "249", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all green rubber cubes. Subtract all red matte blocks. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "251": { + "question_id": "251", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all green balls. Subtract all shiny things. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "4", + "prediction": "4", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "253": { + "question_id": "253", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many objects are preferred by more than 7 people in at least one category?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "255": { + "question_id": "255", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u2220BAC = 110.0, if A and B are symmetrical with respect to the line MP, A and C are symmetrical with respect to the line NQ, then the size of \u2220PAQ is ()\nChoices:\n(A) 70\u00b0\n(B) 55\u00b0\n(C) 40\u00b0\n(D) 30\u00b0", + "choices": [ + "70\u00b0", + "55\u00b0", + "40\u00b0", + "30\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "70\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 188, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "257": { + "question_id": "257", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u4ee5\u76f4\u89d2\u4e09\u89d2\u5f62\u7684\u4e09\u8fb9\u4e3a\u8fb9\u5411\u5916\u4f5c\u6b63\u65b9\u5f62\uff0c\u5176\u4e2d\u4e24\u4e2a\u6b63\u65b9\u5f62\u7684\u9762\u79ef\u5982\u56fe\u6240\u793a\uff0c\u5219\u6b63\u65b9\u5f62A\u7684\u9762\u79ef\u4e3a\uff08\uff09\nChoices:\n(A) 6\n(B) 36\n(C) 64\n(D) 8", + "choices": [ + "6", + "36", + "64", + "8" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 119, + "img_width": 109, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "259": { + "question_id": "259", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large yellow metal blocks. Subtract all gray metallic cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "261": { + "question_id": "261", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 345, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "263": { + "question_id": "263", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "38", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 117, + "img_width": 113, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "265": { + "question_id": "265", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Justine's P.E. class participated in a push-up competition, and Justine wrote down how many push-ups each person could do. How many people did at least 60 push-ups? (Unit: people)", + "choices": null, + "answer": "11", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 136, + "img_width": 329, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "267": { + "question_id": "267", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What shape of a leaf is similar to Serrate, but has smaller, evenly-spaced teeth?\nChoices:\n(A) Undulate\n(B) Sinuate\n(C) Serrulate\n(D) Entire", + "choices": [ + "Undulate", + "Sinuate", + "Serrulate", + "Entire" + ], + "answer": "Serrulate", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Undulate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 306, + "img_width": 529, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "269": { + "question_id": "269", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the elevation angle of the top of a building is 30.0 when viewed from point A in the air by a hot air balloon, and the depression angle of this building is 60.0. The horizontal distance between the hot air balloon and the building is 120.0. The height of this building is ()\nChoices:\n(A) 160m\n(B) 160\u221a{3}m\n(C) (160-160\u221a{3})m\n(D) 360m", + "choices": [ + "160m", + "160\u221a{3}m", + "(160-160\u221a{3})m", + "360m" + ], + "answer": "160\u221a{3}m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "160m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 159, + "img_width": 133, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "271": { + "question_id": "271", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find y\nChoices:\n(A) 3\n(B) 4.5\n(C) 5\n(D) 6", + "choices": [ + "3", + "4.5", + "5", + "6" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 287, + "img_width": 448, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "273": { + "question_id": "273", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: One diagonal of a rhombus is twice as long as the other diagonal. If the area of the rhombus is 169 square millimeters, what are the lengths of the diagonals?\nChoices:\n(A) 6.5\n(B) 13\n(C) 26\n(D) 52", + "choices": [ + "6.5", + "13", + "26", + "52" + ], + "answer": "26", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 237, + "img_width": 347, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "275": { + "question_id": "275", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220BAC = 90.0, AD \u22a5 BC at D, DE \u22a5 AB at E, AD = 3.0, DE = 2.0, then the length of CD is ()\nChoices:\n(A) \\frac{21}{2}\n(B) \\frac{\u221a{15}}{2}\n(C) \\frac{9}{2}\n(D) \\frac{3\u221a{5}}{2}", + "choices": [ + "\\frac{21}{2}", + "\\frac{\u221a{15}}{2}", + "\\frac{9}{2}", + "\\frac{3\u221a{5}}{2}" + ], + "answer": "\\frac{3\u221a{5}}{2}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{21}{2}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 107, + "img_width": 185, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "277": { + "question_id": "277", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which cube is identical to the unfolded net?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "D", + "extraction": "E", + "prediction": "E", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 591, + "img_width": 424, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "279": { + "question_id": "279", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would be directly affected by a decrease in sunlight?\nChoices:\n(A) grass\n(B) mouse\n(C) grasshopper\n(D) owl", + "choices": [ + "grass", + "mouse", + "grasshopper", + "owl" + ], + "answer": "grass", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "grass", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 423, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "281": { + "question_id": "281", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Was this a square pizza?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "283": { + "question_id": "283", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{WTY} \\cong \\overline{TWY}$. Find $x$.\nChoices:\n(A) 2\n(B) 4\n(C) 5\n(D) 10", + "choices": [ + "2", + "4", + "5", + "10" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 416, + "img_width": 559, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "285": { + "question_id": "285", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, it is known that AB is the diameter of \u2299O, if the degree of \u2220BOC is 50.0, then the degree of \u2220A is ()\nChoices:\n(A) 50\u00b0\n(B) 40\u00b0\n(C) 30\u00b0\n(D) 25\u00b0", + "choices": [ + "50\u00b0", + "40\u00b0", + "30\u00b0", + "25\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 100, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "287": { + "question_id": "287", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region is larger? R1 or R2?\nA. R1\nB. R2\nChoices:\n(A) R1\n(B) R2\n(C) R5\n(D) R3\n(E) R4", + "choices": [ + "R1", + "R2", + "R5", + "R3", + "R4" + ], + "answer": "R2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "R1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 325, + "img_width": 370, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "289": { + "question_id": "289", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 4 dots divided into 2 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 418, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "291": { + "question_id": "291", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which period the number of full time employees is the maximum?\nChoices:\n(A) Jul '21\n(B) Jun '21\n(C) Mar '21\n(D) May '21\n(E) Apr '21", + "choices": [ + "Jul '21", + "Jun '21", + "Mar '21", + "May '21", + "Apr '21" + ], + "answer": "May '21", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Jul '21", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "293": { + "question_id": "293", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, grasshopper population increase if\nChoices:\n(A) grouse decrease\n(B) chipmunk increases\n(C) grasses increases\n(D) elk increase", + "choices": [ + "grouse decrease", + "chipmunk increases", + "grasses increases", + "elk increase" + ], + "answer": "grasses increases", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "grouse decrease", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 156, + "img_width": 456, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "295": { + "question_id": "295", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "297": { + "question_id": "297", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of green buss greater than the number of blue school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "299": { + "question_id": "299", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the center and the rightmost person? (Unit: years)", + "choices": null, + "answer": "22", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1067, + "img_width": 1600, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "301": { + "question_id": "301", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model performs the best overall across the three stages in terms of Messenger training performance?\nChoices:\n(A) Dynalang\n(B) EMMA\n(C) R2D2\n(D) IMPALA", + "choices": [ + "Dynalang", + "EMMA", + "R2D2", + "IMPALA" + ], + "answer": "Dynalang", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Dynalang", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 524, + "img_width": 2012, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "303": { + "question_id": "303", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Lime Green less than Dim Gray?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 797, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "305": { + "question_id": "305", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people prefer the most preferred object?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "307": { + "question_id": "307", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Figure is an overhead view of the path taken by a race car driver as his car collides with the racetrack wall. Just before the collision, he is traveling at speed $v_i=70 \\mathrm{~m} / \\mathrm{s}$ along a straight line at $30^{\\circ}$ from the wall. Just after the collision, he is traveling at speed $v_f=50 \\mathrm{~m} / \\mathrm{s}$ along a straight line at $10^{\\circ}$ from the wall. His mass $m$ is $80 \\mathrm{~kg}$. The collision lasts for $14 \\mathrm{~ms}$. What is the magnitude of the average force on the driver during the collision?", + "choices": null, + "answer": "2.58", + "extraction": "100.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 466, + "img_width": 772, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning", + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "309": { + "question_id": "309", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The movie critic liked to count the number of actors in each movie he saw. How many movies had at least 30 actors but fewer than 47 actors? (Unit: movies)", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 136, + "img_width": 131, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "311": { + "question_id": "311", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "2", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1947, + "img_width": 1620, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "313": { + "question_id": "313", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "10", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 334, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "315": { + "question_id": "315", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram above, angle A is congruent to angle BED, and angle C is congruent to angle D. If the ratio of the length of AB to the length of EB is 5:1, and the area of the triangle BED is 5*a^2 + 10, what is the area of triangle ABC?\nChoices:\n(A) 5*a^2 + 10\n(B) 25*a^2 + 50\n(C) 25*a^2 + 100\n(D) 125*a^2 + 250\n(E) cannot be determined", + "choices": [ + "5*a^2 + 10", + "25*a^2 + 50", + "25*a^2 + 100", + "125*a^2 + 250", + "cannot be determined" + ], + "answer": "125*a^2 + 250", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5*a^2 + 10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 463, + "img_width": 749, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "317": { + "question_id": "317", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 361, + "img_width": 496, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "319": { + "question_id": "319", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Would most of the ground cover be considered weeds?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "321": { + "question_id": "321", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At a price of $330, is there a shortage or a surplus?'\nChoices:\n(A) shortage\n(B) surplus", + "choices": [ + "shortage", + "surplus" + ], + "answer": "surplus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "shortage", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 353, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "323": { + "question_id": "323", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Craig just downloaded the new game Gem Excavator on his phone. In the first level, Craig gains points for each green gem he finds. However, he loses points for each red gem he finds. The table shows how the gems affect Craig's points. Which color gem affects Craig's points less?'\nChoices:\n(A) green\n(B) red", + "choices": [ + "green", + "red" + ], + "answer": "green", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "green", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 94, + "img_width": 230, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "325": { + "question_id": "325", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Web Purple have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "327": { + "question_id": "327", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many items sold less than 1 units in at least one store?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "329": { + "question_id": "329", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The derivative of y at x=6 is ____ that at x=8\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "larger than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 2039, + "img_width": 2560, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "331": { + "question_id": "331", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Several people compared how many Web pages they had visited. What is the mean of the numbers?'", + "choices": null, + "answer": "64", + "extraction": "65", + "prediction": "65", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 311, + "img_width": 246, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "333": { + "question_id": "333", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find tan X\nChoices:\n(A) \\frac { 5 } { 12 }\n(B) \\frac { 12 } { 13 }\n(C) \\frac { 17 } { 12 }\n(D) \\frac { 12 } { 5 }", + "choices": [ + "\\frac { 5 } { 12 }", + "\\frac { 12 } { 13 }", + "\\frac { 17 } { 12 }", + "\\frac { 12 } { 5 }" + ], + "answer": "\\frac { 5 } { 12 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac { 5 } { 12 }", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 149, + "img_width": 297, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "335": { + "question_id": "335", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large brown matte balls. Subtract all blue cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "337": { + "question_id": "337", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) to eight.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "339": { + "question_id": "339", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u2299O\u4e2d\uff0cAB=AC\uff0c\u2220BAC\uff1d70\u00b0\uff0c\u5219\u2220AEC\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 65\u00b0\n(B) 75\u00b0\n(C) 50\u00b0\n(D) 55\u00b0", + "choices": [ + "65\u00b0", + "75\u00b0", + "50\u00b0", + "55\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 112, + "img_width": 115, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "341": { + "question_id": "341", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is six (_).\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "o'clock", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "343": { + "question_id": "343", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small purple metallic spheres. Subtract all small purple things. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "345": { + "question_id": "345", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many kites are there?", + "choices": null, + "answer": "25", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 429, + "img_width": 711, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "347": { + "question_id": "347", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of green metallic double buss less than the number of big purple rubber cruisers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "349": { + "question_id": "349", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which capability boasts the highest proportion (%)?\nChoices:\n(A) Rec\n(B) OCR\n(C) Know\n(D) Gen\n(E) Spat\n(F) Math", + "choices": [ + "Rec", + "OCR", + "Know", + "Gen", + "Spat", + "Math" + ], + "answer": "Rec", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Rec", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "bar chart", + "grade": "college", + "img_height": 1348, + "img_width": 1704, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "351": { + "question_id": "351", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer purple rubber objects that are to the left of the red object than tiny matte bicycles?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "353": { + "question_id": "353", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: At time $t=0$ a tank contains $Q_0 \\mathrm{lb}$ of salt dissolved in 100 gal of water; see Figure 2.3.1. Assume that water containing $\\frac{1}{4} \\mathrm{lb}$ of salt/gal is entering the tank at a rate of $r \\mathrm{gal} / \\mathrm{min}$ and that the well-stirred mixture is draining from the tank at the same rate. Set up the initial value problem that describes this flow process. By finding the amount of salt $Q(t)$ in the tank at any time, and the limiting amount $Q_L$ that is present after a very long time, if $r=3$ and $Q_0=2 Q_L$, find the time $T$ after which the salt level is within $2 \\%$ of $Q_L$.", + "choices": null, + "answer": "130.4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 938, + "img_width": 996, + "language": "english", + "skills": [ + "algebraic reasoning", + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "355": { + "question_id": "355", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the parallel lines a and b are intercepted by the straight line c. If \u22201 = 50.0, then the degree of \u22202 is ()\nChoices:\n(A) 150\u00b0\n(B) 130\u00b0\n(C) 110\u00b0\n(D) 100\u00b0", + "choices": [ + "150\u00b0", + "130\u00b0", + "110\u00b0", + "100\u00b0" + ], + "answer": "130\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "150\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 157, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "357": { + "question_id": "357", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Salmon the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 677, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "359": { + "question_id": "359", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Kylie spent a week at the beach and recorded the number of shells she found each day. According to the table, what was the rate of change between Thursday and Friday? (Unit: shells per day)", + "choices": null, + "answer": "-7", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 241, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "361": { + "question_id": "361", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the mold are the cylindrical ports located? \nChoices:\n(A) Upper half\n(B) Lower half\n(C) Medial half\n(D) Lateral half", + "choices": [ + "Upper half", + "Lower half", + "Medial half", + "Lateral half" + ], + "answer": "Lower half", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Upper half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "medical image", + "grade": "college", + "img_height": 435, + "img_width": 596, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "PMC-VQA", + "split": "testmini", + "task": "visual question answering" + }, + "363": { + "question_id": "363", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny gray metal blocks. Subtract all purple things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "365": { + "question_id": "365", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big yellow metallic spheres. Subtract all tiny metal things. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "367": { + "question_id": "367", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "14", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 429, + "img_width": 873, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "369": { + "question_id": "369", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function (f: R to R) surjective?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 331, + "img_width": 266, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "371": { + "question_id": "371", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220ABC\uff1d90\u00b0\uff0c\u70b9D\u3001E\u3001F\u5206\u522b\u662f\u8fb9AB\u3001BC\u3001CA\u7684\u4e2d\u70b9\uff0c\u82e5DE+BF\uff1d8\uff0c\u5219BF\u7684\u503c\u4e3a\uff08\uff09\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 146, + "img_width": 109, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "373": { + "question_id": "373", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the quadrilateral ABCD, \u2220BAD = 120.0, \u2220B = \u2220D = 90.0, if you find a point M on BC and CD respectively, so that the perimeter of \u25b3AMN is the smallest, then the degree of \u2220AMN + \u2220ANM is ()\nChoices:\n(A) 110\u00b0\n(B) 120\u00b0\n(C) 140\u00b0\n(D) 150\u00b0", + "choices": [ + "110\u00b0", + "120\u00b0", + "140\u00b0", + "150\u00b0" + ], + "answer": "120\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "110\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 161, + "img_width": 122, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "375": { + "question_id": "375", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the length of $AC$ in the isosceles triangle ABC. \nChoices:\n(A) 1.5\n(B) 7\n(C) 11\n(D) 12.5", + "choices": [ + "1.5", + "7", + "11", + "12.5" + ], + "answer": "7", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 293, + "img_width": 703, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "377": { + "question_id": "377", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 649, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "379": { + "question_id": "379", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram of the food web shown what will most directly be affected by the loss of the trees?\nChoices:\n(A) horses\n(B) cats\n(C) nothing\n(D) bears", + "choices": [ + "horses", + "cats", + "nothing", + "bears" + ], + "answer": "horses", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "horses", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 400, + "img_width": 570, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "381": { + "question_id": "381", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there more tiny cyan matte articulated buss left of the big school bus than small yellow matte double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "383": { + "question_id": "383", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What value you get , if you divide the largest bar value by 2 ?", + "choices": null, + "answer": "131253.5", + "extraction": "12.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "385": { + "question_id": "385", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Cyan have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 771, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "387": { + "question_id": "387", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Of the four balls in the photo, what is the percentage of them on the ground?", + "choices": null, + "answer": "100", + "extraction": "75", + "prediction": "75", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 485, + "img_width": 363, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "389": { + "question_id": "389", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At a price of $320, is there a shortage or a surplus?'\nChoices:\n(A) shortage\n(B) surplus", + "choices": [ + "shortage", + "surplus" + ], + "answer": "shortage", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "shortage", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 353, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "391": { + "question_id": "391", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, point O is the center of \u2299O, points A, B, and C are on \u2299O, AO \u2225 BC, \u2220AOB = 40.0, then the degree of \u2220OAC is equal to ()\nChoices:\n(A) 40\u00b0\n(B) 60\u00b0\n(C) 50\u00b0\n(D) 20\u00b0", + "choices": [ + "40\u00b0", + "60\u00b0", + "50\u00b0", + "20\u00b0" + ], + "answer": "20\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 96, + "img_width": 96, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "393": { + "question_id": "393", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest and the lowest dark blue bar?", + "choices": null, + "answer": "54", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "395": { + "question_id": "395", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average age of the people in this picture?", + "choices": null, + "answer": "10", + "extraction": "25", + "prediction": "25", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "397": { + "question_id": "397", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9A\u3001B\u3001C\u90fd\u5728\u534a\u5f84\u4e3a2\u7684\u2299O\u4e0a\uff0c\u2220C\uff1d30\u00b0\uff0c\u5219\u5f26AB\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 2.2\n(D) 2.5", + "choices": [ + "1", + "2", + "2.2", + "2.5" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 70, + "img_width": 73, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "399": { + "question_id": "399", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "6", + "extraction": "6", + "prediction": "6", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 241, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "401": { + "question_id": "401", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "403": { + "question_id": "403", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find TX if $E X=24$ and $D E=7$\nChoices:\n(A) 7\n(B) 24\n(C) 25\n(D) 32", + "choices": [ + "7", + "24", + "25", + "32" + ], + "answer": "32", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 221, + "img_width": 564, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "405": { + "question_id": "405", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "19", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1351, + "img_width": 1801, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "407": { + "question_id": "407", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9B\uff0cD\uff0cE\uff0cC\u5728\u540c\u4e00\u6761\u76f4\u7ebf\u4e0a\uff0c\u82e5\u25b3ABD\u224c\u25b3ACE\uff0c\u2220AEC\uff1d110\u00b0\uff0c\u5219\u2220DAE\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 30\u00b0\n(B) 40\u00b0\n(C) 50\u00b0\n(D) 60\u00b0", + "choices": [ + "30\u00b0", + "40\u00b0", + "50\u00b0", + "60\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 67, + "img_width": 76, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "409": { + "question_id": "409", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the radius of this circle?", + "choices": null, + "answer": "5", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 356, + "img_width": 358, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "411": { + "question_id": "411", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average percentage of population having access to electricity per year?", + "choices": null, + "answer": "100", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1081, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "413": { + "question_id": "413", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5df2\u77e5\uff1a\u5982\u56fe\uff0c\u25b3ABC\u4e2d\uff0cAB\uff1dAC\uff0cBD\u4e3a\u2220ABC\u7684\u5e73\u5206\u7ebf\uff0c\u2220BDC\uff1d75\u00b0\uff0c\u5219\u2220A\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 25\u00b0\n(B) 35\u00b0\n(C) 40\u00b0\n(D) 45\u00b0", + "choices": [ + "25\u00b0", + "35\u00b0", + "40\u00b0", + "45\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 132, + "img_width": 123, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "415": { + "question_id": "415", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average annual wage in Slovak Republic in the year 2019", + "choices": null, + "answer": "15017", + "extraction": "12000", + "prediction": "12000", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "417": { + "question_id": "417", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "8", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 748, + "img_width": 564, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "419": { + "question_id": "419", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) after nine.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "421": { + "question_id": "421", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An elevator cab of mass $m=500 \\mathrm{~kg}$ is descending with speed $v_i=4.0 \\mathrm{~m} / \\mathrm{s}$ when its supporting cable begins to slip, allowing it to fall with constant acceleration $\\vec{a}=\\vec{g} / 5$.\r\nDuring the $12 \\mathrm{~m}$ fall, what is the work $W_T$ done on the cab by the upward pull $\\vec{T}$ of the elevator cable?", + "choices": null, + "answer": "-47", + "extraction": "1200", + "prediction": "1200", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 1190, + "img_width": 550, + "language": "english", + "skills": [ + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "423": { + "question_id": "423", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Deep Pink less than Dark Gray?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 577, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "425": { + "question_id": "425", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5728Rt\u25b3ABC\u4e2d\uff0c\u2220C\uff1d90\u00b0\uff0c\u82e5AC\uff1d6\uff0cBC\uff1d8\uff0c\u5219cosA\u7684\u503c\u4e3a\uff08\uff09\nChoices:\n(A) 0.6\n(B) 0.8\n(C) 0.75\n(D) \\frac{4}{3}", + "choices": [ + "0.6", + "0.8", + "0.75", + "\\frac{4}{3}" + ], + "answer": "0.6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.6", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 120, + "img_width": 171, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "427": { + "question_id": "427", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people prefer the most preferred object?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "429": { + "question_id": "429", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people prefer the least preferred object?", + "choices": null, + "answer": "10", + "extraction": "10", + "prediction": "10", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "431": { + "question_id": "431", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, what would happen to dragonfly if all mayfly dies\nChoices:\n(A) remains the same\n(B) increase\n(C) decrease\n(D) NA", + "choices": [ + "remains the same", + "increase", + "decrease", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "remains the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 297, + "img_width": 464, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "433": { + "question_id": "433", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "5", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 350, + "img_width": 425, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "435": { + "question_id": "435", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of employed females who are not attending school greater than the average percentage of employed females who are not attending school taken over all years ?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 955, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "437": { + "question_id": "437", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fig.Q3 shows an excerpt of the transmission phase of a TCP connection. Assume the length of the IP header is 20 bytes. What is the ACK number at message 6?", + "choices": null, + "answer": "839", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 814, + "img_width": 638, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "439": { + "question_id": "439", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: is this function convex?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 256, + "img_width": 539, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "441": { + "question_id": "441", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "9", + "extraction": "9", + "prediction": "9", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 241, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "443": { + "question_id": "443", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure: In Rt\u25b3ABC, \u2220C = 90.0, AC = 8.0, AB = 10.0, then the value of sinB is equal to ()\nChoices:\n(A) \\frac{3}{5}\n(B) \\frac{4}{5}\n(C) \\frac{3}{4}\n(D) \\frac{4}{3}", + "choices": [ + "\\frac{3}{5}", + "\\frac{4}{5}", + "\\frac{3}{4}", + "\\frac{4}{3}" + ], + "answer": "\\frac{4}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{3}{5}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 80, + "img_width": 169, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "445": { + "question_id": "445", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Slate less than Saddle Brown?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 436, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "447": { + "question_id": "447", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Midnight Blue intersect Purple?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 685, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "449": { + "question_id": "449", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many miles per gallon do the average motorcycle get on the highway?", + "choices": null, + "answer": "40", + "extraction": "50", + "prediction": "50", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "451": { + "question_id": "451", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of small yellow metallic choppers that are behind the large cyan thing less than the number of brown metal double buss that are behind the small yellow shiny thing?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "453": { + "question_id": "453", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "4", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 116, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "455": { + "question_id": "455", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If x = 32 and r = 18, what is the length of the arc shown in the figure above?\nChoices:\n(A) 16*\\pi/5\n(B) 32*\\pi/5\n(C) 36*\\pi\n(D) 288*\\pi/5\n(E) 576*\\pi", + "choices": [ + "16*\\pi/5", + "32*\\pi/5", + "36*\\pi", + "288*\\pi/5", + "576*\\pi" + ], + "answer": "16*\\pi/5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "16*\\pi/5", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 353, + "img_width": 575, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "457": { + "question_id": "457", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "4525", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 97, + "img_width": 605, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "459": { + "question_id": "459", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large cyan matte balls. Subtract all tiny shiny objects. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "461": { + "question_id": "461", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A perceptual audio codec is used to compress an audio signal. The codec groups every 4 barks into a subband and then allocates bits to different subbands according to the result of a spectrum analysis based on a psychoacoustic model. All samples in the same subband are quantized with the same quantizer, and the bit resolution of which is allocated by the codec. (The Bark scale is a psychoacoustical scale proposed by Eberhard Zwicker in 1961.) Fig. Q1a shows the frequency spectrum of a windowed segment of audio signal. The psychoacoustic model shown in Fig. Q1b is used in the audio codec to derive the masking threshold for the audio segment. How many potential maskers in Fig. Q1a?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 488, + "img_width": 908, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "463": { + "question_id": "463", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large gray things. Subtract all small brown metallic balls. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "465": { + "question_id": "465", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Green the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 628, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "467": { + "question_id": "467", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The degree measures of minor arc $\\widehat{A C}$ and major arc $\\widehat{A D C}$ are $x$ and $y$ respectively. If $m\u2220ABC = 70\u00b0$, find $x$.\nChoices:\n(A) 90\n(B) 100\n(C) 110\n(D) 120", + "choices": [ + "90", + "100", + "110", + "120" + ], + "answer": "110", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "90", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 235, + "img_width": 499, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "469": { + "question_id": "469", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Sky Blue less than Chartreuse?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "471": { + "question_id": "471", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Lily and her friends recorded their scores while playing a board game. Which score did the greatest number of people receive?'", + "choices": null, + "answer": "8", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 190, + "img_width": 351, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "473": { + "question_id": "473", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "12", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2604, + "img_width": 2500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "475": { + "question_id": "475", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 71, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "477": { + "question_id": "477", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past three.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "half", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "479": { + "question_id": "479", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How many times Norway data bigger than Italy data ?", + "choices": null, + "answer": "2.54", + "extraction": "1.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "481": { + "question_id": "481", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 404, + "img_width": 592, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "483": { + "question_id": "483", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, point C is on \u2299O, AE is the tangent of \u2299O, A is the tangent point, connect BC and extend to intersect AE at point D. If \u2220AOC = 80.0, then the degree of \u2220ADB is ()\nChoices:\n(A) 40\u00b0\n(B) 50\u00b0\n(C) 60\u00b0\n(D) 20\u00b0", + "choices": [ + "40\u00b0", + "50\u00b0", + "60\u00b0", + "20\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 129, + "img_width": 165, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "485": { + "question_id": "485", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9D\u5728\u7b49\u8fb9\u25b3ABC\u7684\u8fb9CB\u7684\u5ef6\u957f\u7ebf\u4e0a\uff0c\u70b9E\u5728\u7ebf\u6bb5BC\u4e0a\uff0c\u8fde\u63a5AD\uff0cAE\uff0c\u82e5DA\uff1dDE\uff0c\u4e14\u2220DAB\uff1d20\u00b0\uff0c\u90a3\u4e48\u2220EAC\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 20\u00b0\n(B) 15\u00b0\n(C) 10\u00b0\n(D) 5\u00b0", + "choices": [ + "20\u00b0", + "15\u00b0", + "10\u00b0", + "5\u00b0" + ], + "answer": "10\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 235, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "487": { + "question_id": "487", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer big cars behind the small brown shiny mountain bike than tiny objects on the right side of the bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "489": { + "question_id": "489", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For trapezoid ABCD shown above, AB = 24, AD = 23, and BC = 16. What is the length of segment CD?", + "choices": null, + "answer": "25", + "extraction": "18", + "prediction": "18", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 297, + "img_width": 426, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "491": { + "question_id": "491", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 540, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "493": { + "question_id": "493", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function differentiable at every point?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 847, + "img_width": 800, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "495": { + "question_id": "495", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer green things in front of the blue metallic car than choppers right of the chopper?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "497": { + "question_id": "497", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "499": { + "question_id": "499", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Quadrilateral $ABDC$ is a rectangle. If $m\\angle1 = 38$, find $m \\angle 2$\nChoices:\n(A) 33\n(B) 38\n(C) 52\n(D) 87", + "choices": [ + "33", + "38", + "52", + "87" + ], + "answer": "52", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "33", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 323, + "img_width": 559, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "501": { + "question_id": "501", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big red rubber cylinders. Subtract all blue objects. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "503": { + "question_id": "503", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the leftmost and the center person? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 225, + "img_width": 338, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "505": { + "question_id": "505", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the circle O with a radius of 5.0, the length of the chord AB is 8.0, then the distance from the center O to the chord AB is ()\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 100, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "507": { + "question_id": "507", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen if the hawk population increased?\nChoices:\n(A) mice would increase\n(B) sparrows increased\n(C) garter snakes would decrease\n(D) grass decreased", + "choices": [ + "mice would increase", + "sparrows increased", + "garter snakes would decrease", + "grass decreased" + ], + "answer": "garter snakes would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "mice would increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "509": { + "question_id": "509", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cadet Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 400, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "511": { + "question_id": "511", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people like the most preferred object in the whole chart?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "513": { + "question_id": "513", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the highest value in states that border West Virginia ?\nChoices:\n(A) 43.2%-63.6%\n(B) 45.2%-65.6%\n(C) 42.2%-62.6%\n(D) 41.2%-61.6%\n(E) 44.2%-64.6%", + "choices": [ + "43.2%-63.6%", + "45.2%-65.6%", + "42.2%-62.6%", + "41.2%-61.6%", + "44.2%-64.6%" + ], + "answer": "42.2%-62.6%", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "43.2%-63.6%", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "515": { + "question_id": "515", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: You would potentially see a decrease in which organism if gulls disappeared?\nChoices:\n(A) herring\n(B) kril\n(C) anchovy\n(D) phytoplankton", + "choices": [ + "herring", + "kril", + "anchovy", + "phytoplankton" + ], + "answer": "kril", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "herring", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 549, + "img_width": 398, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "517": { + "question_id": "517", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: At Bloomington Consulting, the head of human resources examined how the number of employees with health care benefits varied in response to policy changes. According to the table, what was the rate of change between 2014 and 2015? (Unit: employees per year)", + "choices": null, + "answer": "-1", + "extraction": "-2", + "prediction": "-2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 275, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "519": { + "question_id": "519", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many Triangles do you see in the picture?", + "choices": null, + "answer": "12", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 852, + "img_width": 948, + "language": "english", + "skills": [ + "logical reasoning", + "geometry reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "521": { + "question_id": "521", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, point C is a point on \u2299O, \u2220C = 20.0, then the degree of \u2220BOC is ()\nChoices:\n(A) 20\u00b0\n(B) 30\u00b0\n(C) 40\u00b0\n(D) 60\u00b0", + "choices": [ + "20\u00b0", + "30\u00b0", + "40\u00b0", + "60\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 100, + "img_width": 120, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "523": { + "question_id": "523", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, a teaching interest group wants to measure the height of a tree CD. They firstly measured the elevation angle of the tree top C at point A as 30.0, and then proceeded 10.0 along the direction of AD to point B, and the elevation angle of tree top C measured at B is 60.0 (the three points A, B, and D are on the same straight line), then the height of the tree CD is ()\nChoices:\n(A) 10m\n(B) 5m\n(C) 5\u221a{3}m\n(D) 10\u221a{3}m", + "choices": [ + "10m", + "5m", + "5\u221a{3}m", + "10\u221a{3}m" + ], + "answer": "5\u221a{3}m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 179, + "img_width": 285, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "525": { + "question_id": "525", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest value shown on the X axis of first plot?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2209, + "img_width": 1711, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "527": { + "question_id": "527", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big shiny cars in front of the red airliner greater than the number of big purple road bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "529": { + "question_id": "529", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what number does the smaller arrow point to?", + "choices": null, + "answer": "1020", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 768, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "531": { + "question_id": "531", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) to five.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "533": { + "question_id": "533", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small cyan cubes. Subtract all large yellow rubber cubes. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "535": { + "question_id": "535", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "-8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "537": { + "question_id": "537", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of red rubber bicycles less than the number of cyan metal school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "539": { + "question_id": "539", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u70b9D\u3001E\u5206\u522b\u662f\u8fb9AB\u3001BC\u7684\u4e2d\u70b9\uff0c\u82e5\u25b3BDE\u7684\u5468\u957f\u662f6\uff0c\u5219\u25b3ABC\u7684\u5468\u957f\u662f\uff08\uff09\nChoices:\n(A) 8\n(B) 10\n(C) 12\n(D) 14", + "choices": [ + "8", + "10", + "12", + "14" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "8", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 71, + "img_width": 149, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "541": { + "question_id": "541", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the cubes is not identical to the unfolded net?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "D", + "extraction": "E", + "prediction": "E", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 560, + "img_width": 280, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "543": { + "question_id": "543", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer small purple matte cars than brown matte things?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "545": { + "question_id": "545", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Violet Red less than Crimson?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 764, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "547": { + "question_id": "547", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram below, which organisms will be most directly affected by a decrease in the amount of grass?\nChoices:\n(A) Insects\n(B) Hawk and snake\n(C) Snake and raccoon\n(D) Mouse and cricket", + "choices": [ + "Insects", + "Hawk and snake", + "Snake and raccoon", + "Mouse and cricket" + ], + "answer": "Insects", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Insects", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 377, + "img_width": 630, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "549": { + "question_id": "549", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, PA and PB are tangent to \u2299O to A and B respectively. Point C and point D are the moving points on line segments PA and PB, and CD always remains tangent to circle O. If PA = 8.0, then perimeter of \u25b3PCD is ()\nChoices:\n(A) 8\n(B) 12\n(C) 16\n(D) \u4e0d\u80fd\u786e\u5b9a", + "choices": [ + "8", + "12", + "16", + "\u4e0d\u80fd\u786e\u5b9a" + ], + "answer": "16", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "8", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 192, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "551": { + "question_id": "551", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest tattoos in male and the least in female?", + "choices": null, + "answer": "14", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "553": { + "question_id": "553", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Violet less than Chocolate?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "555": { + "question_id": "555", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this nest larger than a fist?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 640, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "557": { + "question_id": "557", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220BAC\uff1d90\u00b0\uff0c\u4ee5Rt\u25b3ABC\u7684\u4e09\u8fb9\u4e3a\u8fb9\u5206\u522b\u5411\u5916\u4f5c\u7b49\u8fb9\u4e09\u89d2\u5f62\u25b3A'BC\uff0c\u25b3AB'C\uff0c\u25b3ABC'\uff0c\u82e5\u25b3A'BC\uff0c\u25b3AB'C\u7684\u9762\u79ef\u5206\u522b\u662f10\u548c4\uff0c\u5219\u25b3ABC'\u7684\u9762\u79ef\u662f\uff08\uff09\nChoices:\n(A) 4\n(B) 6\n(C) 8\n(D) 9", + "choices": [ + "4", + "6", + "8", + "9" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 130, + "img_width": 155, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "559": { + "question_id": "559", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the highest number shown on the black outer part of the watch?", + "choices": null, + "answer": "55", + "extraction": "90", + "prediction": "90", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 768, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "561": { + "question_id": "561", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of gray rubber double buss right of the small red aeroplane the same as the number of small objects that are left of the tiny gray matte bicycle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "563": { + "question_id": "563", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which number on the monitor is higher?\nChoices:\n(A) top\n(B) bottom\n(C) left\n(D) right", + "choices": [ + "top", + "bottom", + "left", + "right" + ], + "answer": "bottom", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "top", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "565": { + "question_id": "565", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model can achieve the best ImageNet 10-shot Accuracy score?\nChoices:\n(A) Soft MoE\n(B) Experts Choice\n(C) Tokens Choice\n(D) Dense", + "choices": [ + "Soft MoE", + "Experts Choice", + "Tokens Choice", + "Dense" + ], + "answer": "Soft MoE", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Soft MoE", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 978, + "img_width": 1966, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "567": { + "question_id": "567", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the slug to the nearest inch. The slug is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 252, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "569": { + "question_id": "569", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which subject had the highest pulse rate in baseline period?", + "choices": null, + "answer": "1", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2284, + "img_width": 1786, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "571": { + "question_id": "571", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 613, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "573": { + "question_id": "573", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A race car driver kept track of how many laps he drove in the past 5 days. What is the mode of the numbers?'", + "choices": null, + "answer": "53", + "extraction": "55", + "prediction": "55", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 203, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "575": { + "question_id": "575", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Lines $l$, $m$, and $n$ are perpendicular bisectors of $\\triangle PQR$ and meet at $T$. If $TQ = 2x$, $PT = 3y - 1$, and $TR = 8$, find $z$.\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 287, + "img_width": 509, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "577": { + "question_id": "577", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Consider the following matrices:\r\n$$\r\n\\mathbf{A}=\\left(\\begin{array}{rrr}\r\n1 & 2 & -1 \\\\\r\n0 & 3 & 1 \\\\\r\n2 & 0 & 1\r\n\\end{array}\\right), \\quad \\mathbf{B}=\\left(\\begin{array}{rrr}\r\n2 & 1 & 0 \\\\\r\n0 & -1 & 2 \\\\\r\n1 & 1 & 3\r\n\\end{array}\\right), \\quad \\mathbf{C}=\\left(\\begin{array}{ll}\r\n2 & 1 \\\\\r\n4 & 3 \\\\\r\n1 & 0\r\n\\end{array}\\right)\r\n$$\r\nFind $|\\mathbf{A B}|$.", + "choices": null, + "answer": "-104", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 142, + "img_width": 533, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "579": { + "question_id": "579", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average number of documents required per shipment to export goods in Uganda per year?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1228, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "581": { + "question_id": "581", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large matte cubes. Subtract all matte blocks. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "583": { + "question_id": "583", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x. Round to the nearest tenth.\r\n\nChoices:\n(A) 5.8\n(B) 6.5\n(C) 14.2\n(D) 44.3", + "choices": [ + "5.8", + "6.5", + "14.2", + "44.3" + ], + "answer": "5.8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5.8", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 465, + "img_width": 319, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "585": { + "question_id": "585", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u77e9\u5f62ABCD\u4e2d\uff0cAB\uff1d2\uff0c\u2220AOB\uff1d60\u00b0\uff0c\u5219BD\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 4\n(B) 3\n(C) 2\n(D) 2\u221a{3}", + "choices": [ + "4", + "3", + "2", + "2\u221a{3}" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 148, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "587": { + "question_id": "587", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At 9.0 in the morning, a ship departs from point A and sails in the direction due east at a speed of 40.0 nautical miles per hour, and arrives at point B at 9.0 and 30.0 minutes. As shown in the figure, the island M is measured from A and B. In the direction of 45.0 north by east and 15.0 north by east, then the distance between B and island M is ()\nChoices:\n(A) 20\u6d77\u91cc\n(B) 20\u221a{2}\u6d77\u91cc\n(C) 15\u6d77\u91cc\n(D) 20\u6d77\u91cc", + "choices": [ + "20\u6d77\u91cc", + "20\u221a{2}\u6d77\u91cc", + "15\u6d77\u91cc", + "20\u6d77\u91cc" + ], + "answer": "20\u221a{2}\u6d77\u91cc", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u6d77\u91cc", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 124, + "img_width": 144, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "589": { + "question_id": "589", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number of things are either large objects behind the shiny double bus or tiny gray metal objects?", + "choices": null, + "answer": "5", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "591": { + "question_id": "591", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 600, + "img_width": 900, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "593": { + "question_id": "593", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average of longest light blue bar and shortest gray bar?", + "choices": null, + "answer": "273", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "595": { + "question_id": "595", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Navy Blue the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "597": { + "question_id": "597", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people prefer the least preferred object?", + "choices": null, + "answer": "10", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "599": { + "question_id": "599", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, AC = 6 and BC = 3. Point P lies on line AB between A and B such that line CP is perpendicular to line AB. Which of the following could be the length of line CP?\nChoices:\n(A) 2\n(B) 4\n(C) 5\n(D) 7\n(E) 8", + "choices": [ + "2", + "4", + "5", + "7", + "8" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 340, + "img_width": 393, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "601": { + "question_id": "601", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What's the ratio of smallest segment and second largest segment?", + "choices": null, + "answer": "0.33", + "extraction": "0.17", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 386, + "img_width": 210, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "603": { + "question_id": "603", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is cumulative increase in weight ( in grams) for \"GROUP C\" in third week ( give an approximate value) ?", + "choices": null, + "answer": "300", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2237, + "img_width": 1754, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "605": { + "question_id": "605", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large green matte cubes. Subtract all big green blocks. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "607": { + "question_id": "607", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow shiny things. Subtract all yellow metal things. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "609": { + "question_id": "609", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big green matte cylinders. Subtract all big brown cubes. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "611": { + "question_id": "611", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A shipping company keeps track of the number of boxes in each shipment they send out. How many shipments had exactly 56 boxes? (Unit: shipments)", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 180, + "img_width": 153, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "613": { + "question_id": "613", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many houses are there?", + "choices": null, + "answer": "10", + "extraction": "10", + "prediction": "10", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 87, + "img_width": 473, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "615": { + "question_id": "615", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If two sides of a triangle measure 12 and 7, which of the following cannot be the perimeter of the triangle?\nChoices:\n(A) 29\n(B) 34\n(C) 37\n(D) 38", + "choices": [ + "29", + "34", + "37", + "38" + ], + "answer": "38", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "29", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 195, + "img_width": 522, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "617": { + "question_id": "617", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The magnitude of the acceleration vector a is $10 \\mathrm{~cm} / \\mathrm{s}^2$. Use the figure to estimate the normal components of $\\mathbf{a}$.", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 484, + "img_width": 478, + "language": "english", + "skills": [ + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "619": { + "question_id": "619", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(4)?", + "choices": null, + "answer": "16", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 666, + "img_width": 970, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "621": { + "question_id": "621", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The figure above is composed of 25 small triangles that are congruent and equilateral. If the area of triangle DFH is 10, what is the area of triangle AFK?\nChoices:\n(A) 40\n(B) 42.5\n(C) 50\n(D) 52.5\n(E) 62.5", + "choices": [ + "40", + "42.5", + "50", + "52.5", + "62.5" + ], + "answer": "62.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 315, + "img_width": 397, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "623": { + "question_id": "623", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is twelve (_).\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "o'clock", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "625": { + "question_id": "625", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of blue matte school buss greater than the number of large cyan metallic jets?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "627": { + "question_id": "627", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Some friends played a trivia game and recorded their scores. What is the mode of the numbers?'", + "choices": null, + "answer": "6", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 311, + "img_width": 155, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "629": { + "question_id": "629", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people prefer the object hut?", + "choices": null, + "answer": "20", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "631": { + "question_id": "631", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "633": { + "question_id": "633", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, $m\u22201 = 123$. Find the measure of $\\angle 14$.\nChoices:\n(A) 47\n(B) 57\n(C) 67\n(D) 123", + "choices": [ + "47", + "57", + "67", + "123" + ], + "answer": "57", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "47", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 330, + "img_width": 361, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "635": { + "question_id": "635", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, E is any point in \u25b1ABCD, if S~quadrilateral ABCD~ = 6.0, then the area of \u200b\u200bthe shaded part in the figure is ()\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5", + "choices": [ + "2", + "3", + "4", + "5" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 86, + "img_width": 179, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "637": { + "question_id": "637", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfa\u2225b\uff0c\u76f4\u7ebfa\u4e0e\u77e9\u5f62ABCD\u7684\u8fb9AB\uff0cAD\u5206\u522b\u4ea4\u4e8e\u70b9E\uff0cF\uff0c\u76f4\u7ebfb\u4e0e\u77e9\u5f62ABCD\u7684\u8fb9CB\uff0cCD\u5206\u522b\u4ea4\u4e8e\u70b9G\uff0cH\uff0e\u82e5\u2220AFE\uff1d30\u00b0\uff0c\u5219\u2220DHG\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 100\u00b0\n(B) 110\u00b0\n(C) 120\u00b0\n(D) 130\u00b0", + "choices": [ + "100\u00b0", + "110\u00b0", + "120\u00b0", + "130\u00b0" + ], + "answer": "120\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "100\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 108, + "img_width": 166, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "639": { + "question_id": "639", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What does the dial indicate as the top facing number?", + "choices": null, + "answer": "475", + "extraction": "500", + "prediction": "500", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1024, + "img_width": 768, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VizWiz", + "split": "testmini", + "task": "visual question answering" + }, + "641": { + "question_id": "641", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: The graph of the concentration function $c(t)$ is shown after a 7-mg injection of dye into a heart. Use Simpson's Rule to estimate the cardiac output.", + "choices": null, + "answer": "5.77", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 420, + "img_width": 828, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "643": { + "question_id": "643", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, CD is the diameter of \u2299O, chord DE \u2225 OA, if the degree of \u2220D is 50.0, then the degree of \u2220C is ()\nChoices:\n(A) 25\u00b0\n(B) 30\u00b0\n(C) 40\u00b0\n(D) 50\u00b0", + "choices": [ + "25\u00b0", + "30\u00b0", + "40\u00b0", + "50\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 125, + "img_width": 111, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "645": { + "question_id": "645", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAC\uff0cBD\u662f\u83f1\u5f62ABCD\u7684\u5bf9\u89d2\u7ebf\uff0cBH\u22a5AD\u4e8e\u70b9H\uff0c\u82e5AC\uff1d4\uff0cBD\uff1d3\uff0c\u5219BH\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 2.4\n(B) 2.5\n(C) 4.8\n(D) 5", + "choices": [ + "2.4", + "2.5", + "4.8", + "5" + ], + "answer": "2.4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2.4", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 113, + "img_width": 139, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "647": { + "question_id": "647", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the top view.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "B", + "extraction": "B", + "prediction": "B", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 900, + "img_width": 600, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "649": { + "question_id": "649", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many values are below 30 in Mainly are incidents of individual misconduct?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 461, + "img_width": 310, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "651": { + "question_id": "651", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For an assignment, Johnny looked at which countries got the most Nobel Prizes in various decades. In the 1990s, how many more Nobel Prize winners did Canada have than Italy? (Unit: Nobel Prize winners)", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 156, + "img_width": 224, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "653": { + "question_id": "653", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there at least three distinct shades of blue in this photo?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 425, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "655": { + "question_id": "655", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the value of Russia has the highest transport?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 507, + "img_width": 858, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "657": { + "question_id": "657", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Arkansas have a higher value than Indiana ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "659": { + "question_id": "659", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest value of navy blue bar?", + "choices": null, + "answer": "991", + "extraction": "1000", + "prediction": "1000", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "661": { + "question_id": "661", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this function most likely be?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a trigonometric function", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1274, + "img_width": 1732, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "663": { + "question_id": "663", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past six.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "665": { + "question_id": "665", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $h$ in the triangle.\nChoices:\n(A) 4.62\n(B) 5.66\n(C) 6.93\n(D) 8", + "choices": [ + "4.62", + "5.66", + "6.93", + "8" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4.62", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 161, + "img_width": 275, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "667": { + "question_id": "667", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year has the least difference between the used and new cars?", + "choices": null, + "answer": "2015", + "extraction": "2014", + "prediction": "2014", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "669": { + "question_id": "669", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, line segment AB = 10.0, M is the midpoint of line segment AB, C is the midpoint of line segment MB, N is a point of line segment AM, and MN = 1.0, the length of line segment NC ()\nChoices:\n(A) 2\n(B) 2.5\n(C) 3\n(D) 3.5", + "choices": [ + "2", + "2.5", + "3", + "3.5" + ], + "answer": "3.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 18, + "img_width": 187, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "671": { + "question_id": "671", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the size of the semicircle rounded to 2 decimal places?", + "choices": null, + "answer": "14.14", + "extraction": "3.14", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 312, + "img_width": 433, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "673": { + "question_id": "673", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of large green cars less than the number of brown rubber double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "675": { + "question_id": "675", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the cross section of a small reservoir dam is a right trapezoid, the width of crest BC is 6.0, the height of dam is 14.0, and the slope of the slope CD is i = 1.0:2.0, then the length of the dam bottom AD is ()\nChoices:\n(A) 13m\n(B) 34m\n(C) (6+14\u221a{3})m\n(D) 40m", + "choices": [ + "13m", + "34m", + "(6+14\u221a{3})m", + "40m" + ], + "answer": "34m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "13m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 83, + "img_width": 183, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "677": { + "question_id": "677", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of dirtbikes right of the large blue object less than the number of small green metallic cars in front of the tiny matte bicycle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "679": { + "question_id": "679", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b1ABCD, the diagonal AC and BD intersect at point O, if AC = 12.0, BD = 8.0, AB = 7.0, then the perimeter of \u25b3OAB is ()\nChoices:\n(A) 15\n(B) 17\n(C) 21\n(D) 27", + "choices": [ + "15", + "17", + "21", + "27" + ], + "answer": "17", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 73, + "img_width": 173, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "681": { + "question_id": "681", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the largest city in the nation where this plane is headquartered?\nChoices:\n(A) hong kong\n(B) osaka\n(C) shanghai\n(D) tokyo", + "choices": [ + "hong kong", + "osaka", + "shanghai", + "tokyo" + ], + "answer": "tokyo", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "hong kong", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "683": { + "question_id": "683", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 157, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "685": { + "question_id": "685", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to organism c if organism b increased?\nChoices:\n(A) decrease\n(B) increase\n(C) can't predict\n(D) stay same", + "choices": [ + "decrease", + "increase", + "can't predict", + "stay same" + ], + "answer": "increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 246, + "img_width": 574, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "687": { + "question_id": "687", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What could happen that would increase the number of krill?\nChoices:\n(A) increase in phytoplankton\n(B) decrease in penguins\n(C) increase in fish\n(D) increase in birds", + "choices": [ + "increase in phytoplankton", + "decrease in penguins", + "increase in fish", + "increase in birds" + ], + "answer": "increase in phytoplankton", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "increase in phytoplankton", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 396, + "img_width": 576, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "689": { + "question_id": "689", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are these people sitting in a circle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "691": { + "question_id": "691", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Calculate the missing item.", + "choices": null, + "answer": "256", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 500, + "img_width": 596, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "693": { + "question_id": "693", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the orange larger than the car?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "695": { + "question_id": "695", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Salmon greater than Dark Orchid?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 734, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "697": { + "question_id": "697", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the parallelogram ABCD, it is known that AB = 6.0, BC = 9.0, \u2220B = 30.0, then the area of \u200b\u200bthe parallelogram ABCD is ()\nChoices:\n(A) 12\n(B) 18\n(C) 27\n(D) 54", + "choices": [ + "12", + "18", + "27", + "54" + ], + "answer": "27", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "12", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 68, + "img_width": 205, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "699": { + "question_id": "699", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the center and the rightmost person? (Unit: years)", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2684, + "img_width": 4577, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "701": { + "question_id": "701", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 109, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "703": { + "question_id": "703", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the sum of highest value and lowest value of navy blue bar?", + "choices": null, + "answer": "2372.1", + "extraction": "1.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "705": { + "question_id": "705", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the heart wider than more than half the width of the thorax?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "medical image", + "grade": "college", + "img_height": 512, + "img_width": 419, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "VQA-RAD", + "split": "testmini", + "task": "visual question answering" + }, + "707": { + "question_id": "707", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0ca\u2225b\uff0c\u22201\uff1d60\u00b0\uff0c\u5219\u22202\u7684\u5927\u5c0f\u662f\uff08\uff09\nChoices:\n(A) 60\u00b0\n(B) 80\u00b0\n(C) 100\u00b0\n(D) 120\u00b0", + "choices": [ + "60\u00b0", + "80\u00b0", + "100\u00b0", + "120\u00b0" + ], + "answer": "120\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 120, + "img_width": 154, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "709": { + "question_id": "709", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(0)?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 393, + "img_width": 552, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "711": { + "question_id": "711", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 270, + "img_width": 369, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "713": { + "question_id": "713", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $x$.\nChoices:\n(A) 3\n(B) 4\n(C) 6\n(D) 7", + "choices": [ + "3", + "4", + "6", + "7" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 422, + "img_width": 521, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "715": { + "question_id": "715", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this a periodic function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1920, + "img_width": 1920, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "717": { + "question_id": "717", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is \\int_1^{\\infty} {1\\over x^{0.99}} dx finite according to this graph ?\n\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 350, + "img_width": 314, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "719": { + "question_id": "719", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Brenda graphed the daily low temperature for 5 days. What is the range of the numbers?'", + "choices": null, + "answer": "13", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 225, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "721": { + "question_id": "721", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many odd functions are in the graph?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 297, + "img_width": 441, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "723": { + "question_id": "723", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function convex?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 277, + "img_width": 468, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "725": { + "question_id": "725", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In Figure, suppose that Barbara's velocity relative to Alex is a constant $v_{B A}=52 \\mathrm{~km} / \\mathrm{h}$ and car $P$ is moving in the negative direction of the $x$ axis.\r\n(a) If Alex measures a constant $v_{P A}=-78 \\mathrm{~km} / \\mathrm{h}$ for car $P$, what velocity $v_{P B}$ will Barbara measure?", + "choices": null, + "answer": "-130", + "extraction": "-26", + "prediction": "-26", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 690, + "img_width": 976, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "727": { + "question_id": "727", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the largest and the smallest value in the chart?", + "choices": null, + "answer": "70", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "729": { + "question_id": "729", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest accuracy reported in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "731": { + "question_id": "731", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The train conductor made sure to count the number of passengers on each train. What is the smallest number of passengers? (Unit: passengers)", + "choices": null, + "answer": "40", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 180, + "img_width": 159, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "733": { + "question_id": "733", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Square ABCD. CT: tangent to semicircle. Find the angle \u2220CTD. Return the numeric value.", + "choices": null, + "answer": "63.4", + "extraction": "45.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 1018, + "img_width": 972, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "735": { + "question_id": "735", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big cyan things in front of the cyan rubber suv less than the number of big suvs that are behind the red bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "737": { + "question_id": "737", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the perimeter of the parallelogram.\nChoices:\n(A) 32\n(B) 39\n(C) 46\n(D) 78", + "choices": [ + "32", + "39", + "46", + "78" + ], + "answer": "78", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "32", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 179, + "img_width": 352, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "739": { + "question_id": "739", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Hannah need to buy a baking dish and a cookie jar? (Unit: $)", + "choices": null, + "answer": "23", + "extraction": "24", + "prediction": "24", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 160, + "img_width": 201, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "741": { + "question_id": "741", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "13", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1080, + "img_width": 1920, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "743": { + "question_id": "743", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the different between the highest unemployment rate and the lowest?", + "choices": null, + "answer": "10.53", + "extraction": "0.17", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "745": { + "question_id": "745", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2832, + "img_width": 4256, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "747": { + "question_id": "747", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\odot M$, $FL=24,HJ=48$, and $m \\widehat {HP}=65$. Find $m \\widehat {HJ}$.\nChoices:\n(A) 65\n(B) 120\n(C) 130\n(D) 155", + "choices": [ + "65", + "120", + "130", + "155" + ], + "answer": "130", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 467, + "img_width": 507, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "749": { + "question_id": "749", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b3ABC, DE \u2225 BC, if AB = 7.0, AC = 5.0, AD = 3.0, then DE = ()\nChoices:\n(A) \\frac{15}{4}cm\n(B) \\frac{20}{3}cm\n(C) \\frac{15}{7}cm\n(D) \\frac{20}{7}cm", + "choices": [ + "\\frac{15}{4}cm", + "\\frac{20}{3}cm", + "\\frac{15}{7}cm", + "\\frac{20}{7}cm" + ], + "answer": "\\frac{20}{7}cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{15}{4}cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 98, + "img_width": 181, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "751": { + "question_id": "751", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would most likely happen if Artemia was removed?\nChoices:\n(A) Seahorses would decrease\n(B) Rotifers would decrease\n(C) Mysids would decrease\n(D) Algae would decrease", + "choices": [ + "Seahorses would decrease", + "Rotifers would decrease", + "Mysids would decrease", + "Algae would decrease" + ], + "answer": "Seahorses would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Seahorses would decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 363, + "img_width": 862, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "753": { + "question_id": "753", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "755": { + "question_id": "755", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this function most likely be?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a polynomial", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 776, + "img_width": 1430, + "language": "english", + "skills": [ + "algebraic reasoning", + "statistical reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "757": { + "question_id": "757", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x to the nearest tenth. Assume that segments that appear to be tangent are tangent.\nChoices:\n(A) 7.2\n(B) 8\n(C) 12\n(D) 15", + "choices": [ + "7.2", + "8", + "12", + "15" + ], + "answer": "7.2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7.2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 165, + "img_width": 220, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "759": { + "question_id": "759", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 201, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "761": { + "question_id": "761", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens to the crayfish population if the Largemouth Bass and Northern Pike populations decrease?\nChoices:\n(A) Nothing\n(B) Decrease\n(C) Slightly Decrease\n(D) Increase", + "choices": [ + "Nothing", + "Decrease", + "Slightly Decrease", + "Increase" + ], + "answer": "Increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Nothing", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 319, + "img_width": 405, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "763": { + "question_id": "763", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny shiny balls. Subtract all purple objects. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "765": { + "question_id": "765", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Chartreuse the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 514, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "767": { + "question_id": "767", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the maximum value of y?", + "choices": null, + "answer": "5", + "extraction": "25", + "prediction": "25", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 429, + "img_width": 483, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "769": { + "question_id": "769", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagram below is a model of two solutions. Each blue ball represents one particle of solute. Which solution has a higher concentration of blue particles?\nChoices:\n(A) neither; their concentrations are the same\n(B) Solution A\n(C) Solution B", + "choices": [ + "neither; their concentrations are the same", + "Solution A", + "Solution B" + ], + "answer": "Solution A", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; their concentrations are the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "elementary school", + "img_height": 251, + "img_width": 378, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "771": { + "question_id": "771", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Base your answers on the diagram of a food chain below and on your knowledge of science. If the population of snakes increases, the population of frogs will most likely\nChoices:\n(A) decrease\n(B) remain the same\n(C) increase\n(D) None", + "choices": [ + "decrease", + "remain the same", + "increase", + "None" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 720, + "img_width": 960, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "773": { + "question_id": "773", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, point D is on the extended line of AB, passing point D is the tangent of \u2299O, and the tangent point is C, if \u2220A = 25.0, then \u2220D = ()\nChoices:\n(A) 25\u00b0\n(B) 40\u00b0\n(C) 50\u00b0\n(D) 65\u00b0", + "choices": [ + "25\u00b0", + "40\u00b0", + "50\u00b0", + "65\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 117, + "img_width": 163, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "775": { + "question_id": "775", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 724, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "777": { + "question_id": "777", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In rhombus LMPQ, $m \\angle Q L M=2 x^{2}-10$, $m \\angle Q P M=8 x$, and $M P=10$ . \r\nFind the perimeter of $LMPQ$\nChoices:\n(A) 10\n(B) 40\n(C) 70\n(D) 140", + "choices": [ + "10", + "40", + "70", + "140" + ], + "answer": "40", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 177, + "img_width": 337, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "779": { + "question_id": "779", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the cardiac silhouette less than half the diameter of the diaphragm?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "medical image", + "grade": "college", + "img_height": 841, + "img_width": 1023, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "VQA-RAD", + "split": "testmini", + "task": "visual question answering" + }, + "781": { + "question_id": "781", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\triangle CDF$, $K$ is the centroid and $DK=16$. Find $CD$.\nChoices:\n(A) 9\n(B) 12\n(C) 18\n(D) 18", + "choices": [ + "9", + "12", + "18", + "18" + ], + "answer": "18", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "9", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 540, + "img_width": 461, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "783": { + "question_id": "783", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In order to measure the width of parallel river AB, \u2220ACB = 30.0, \u2220ADB = 60.0, CD = 60.0, then the width of the river AB is ()\nChoices:\n(A) 30m\n(B) 30\u221a{3}m\n(C) (30\u221a{3}+30)m\n(D) (30\u221a{3}-30)m", + "choices": [ + "30m", + "30\u221a{3}m", + "(30\u221a{3}+30)m", + "(30\u221a{3}-30)m" + ], + "answer": "30\u221a{3}m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 87, + "img_width": 130, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "785": { + "question_id": "785", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Part of an ecosystem is shown in this diagram. Imagine the algae and floating plants are prevented from growing. How will that most likely affect this ecosystem?\nChoices:\n(A) The number of ducks will increase\n(B) The number of minnows will increase\n(C) There will be no effect on this ecosystem\n(D) The number of aquatic crustaceans will decrease", + "choices": [ + "The number of ducks will increase", + "The number of minnows will increase", + "There will be no effect on this ecosystem", + "The number of aquatic crustaceans will decrease" + ], + "answer": "The number of aquatic crustaceans will decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "The number of ducks will increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 258, + "img_width": 456, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "787": { + "question_id": "787", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of the zebra's stripes are horizontal?", + "choices": null, + "answer": "50", + "extraction": "90", + "prediction": "90", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "789": { + "question_id": "789", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the values of posse and mortar?", + "choices": null, + "answer": "10", + "extraction": "10", + "prediction": "10", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "791": { + "question_id": "791", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Given $V_s$ = 5V, $R_1$ = 1k\u03a9, $R_2$ = 2.2k\u03a9, $R_3$ = 2.2k\u03a9, $R_4$ = 1.5k\u03a9, and $R_L$ = 4.7k\u03a9. Determine the voltage and current across $R_L$. Answer in unit of V (3 sig.fig.).", + "choices": null, + "answer": "1.06", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 400, + "img_width": 444, + "language": "english", + "skills": [ + "algebraic reasoning", + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "793": { + "question_id": "793", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest Elo score for the agent using an offline RL algorithm?", + "choices": null, + "answer": "1578", + "extraction": "178", + "prediction": "178", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1056, + "img_width": 1922, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "795": { + "question_id": "795", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "75", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 601, + "img_width": 475, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "797": { + "question_id": "797", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the missing pattern in the picture?\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4\n(E) 5\n(F) 6", + "choices": [ + "1", + "2", + "3", + "4", + "5", + "6" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 291, + "img_width": 386, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "799": { + "question_id": "799", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Ruth need to buy a baking dish, a casserole dish, and an ice cream scoop? (Unit: $)", + "choices": null, + "answer": "13", + "extraction": "13", + "prediction": "13", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 128, + "img_width": 229, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "801": { + "question_id": "801", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A gymnast jotted down the number of cartwheels she did each day. What is the mode of the numbers?'", + "choices": null, + "answer": "10", + "extraction": "9", + "prediction": "9", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 280, + "img_width": 272, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "803": { + "question_id": "803", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "805": { + "question_id": "805", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the donut more than half eaten?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 434, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "807": { + "question_id": "807", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following leaf shapes would have the least amount of wind resistance and water loss?\nChoices:\n(A) Truncate\n(B) Acuminate\n(C) Rounded\n(D) Sagittate", + "choices": [ + "Truncate", + "Acuminate", + "Rounded", + "Sagittate" + ], + "answer": "Acuminate", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Truncate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 300, + "img_width": 508, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "809": { + "question_id": "809", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In a group of horses, some individuals have a black coat and others have a reddish-brown coat. In this group, the gene for the coat color trait has two alleles. The allele for a black coat (L) is dominant over the allele for a reddish-brown coat (l).\nThis Punnett square shows a cross between two horses. What is the expected ratio of offspring with a reddish-brown coat to offspring with a black coat? Choose the most likely ratio.\nChoices:\n(A) 1:3\n(B) 4:0\n(C) 3:1\n(D) 0:4\n(E) 2:2", + "choices": [ + "1:3", + "4:0", + "3:1", + "0:4", + "2:2" + ], + "answer": "2:2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1:3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 241, + "img_width": 233, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "811": { + "question_id": "811", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A machine at the candy factory dispensed different numbers of lemon-flavored candies into various bags. What is the smallest number of lemon-flavored candies? (Unit: lemon-flavored candies)", + "choices": null, + "answer": "34", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 136, + "img_width": 247, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "813": { + "question_id": "813", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest value on the X axis?", + "choices": null, + "answer": "30", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2264, + "img_width": 1768, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "815": { + "question_id": "815", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle N C L$\nChoices:\n(A) 60\n(B) 120\n(C) 240\n(D) 360", + "choices": [ + "60", + "120", + "240", + "360" + ], + "answer": "120", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 279, + "img_width": 367, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "817": { + "question_id": "817", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the straight line a \u2225 b, the point B is on the straight line b, and AB \u22a5 BC, \u22202 = 65.0, then the degree of \u22201 is ()\nChoices:\n(A) 65\u00b0\n(B) 25\u00b0\n(C) 35\u00b0\n(D) 45\u00b0", + "choices": [ + "65\u00b0", + "25\u00b0", + "35\u00b0", + "45\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 94, + "img_width": 171, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "819": { + "question_id": "819", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the value of $t$ in the parallelogram.\nChoices:\n(A) 6\n(B) 7\n(C) 8\n(D) 13", + "choices": [ + "6", + "7", + "8", + "13" + ], + "answer": "7", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 400, + "img_width": 428, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "821": { + "question_id": "821", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are most of the people young men?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 360, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "823": { + "question_id": "823", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: You can see how organisms are interconnected from the diagram given. What will be the effect if all the Killer whales are removed?\nChoices:\n(A) The population of tuna will increase\n(B) Mouse will decrease in number\n(C) The phytoplankton will decrease\n(D) The grasshopper will die", + "choices": [ + "The population of tuna will increase", + "Mouse will decrease in number", + "The phytoplankton will decrease", + "The grasshopper will die" + ], + "answer": "The population of tuna will increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "The population of tuna will increase", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1080, + "img_width": 1152, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "825": { + "question_id": "825", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of metallic road bikes that are behind the large bus less than the number of small matte double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "827": { + "question_id": "827", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer for the missing picture.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "D", + "extraction": "E", + "prediction": "E", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 1138, + "img_width": 828, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "829": { + "question_id": "829", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which matchstick needs to be moved in order to create a square?\nChoices:\n(A) Top\n(B) Bottom\n(C) Left\n(D) Right\n(E) Not possible", + "choices": [ + "Top", + "Bottom", + "Left", + "Right", + "Not possible" + ], + "answer": "Left", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Top", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 396, + "img_width": 378, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "831": { + "question_id": "831", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An author recorded how many words she wrote in the past 3 days. How many words in total did the author write on Thursday and Friday? (Unit: words)", + "choices": null, + "answer": "679", + "extraction": "635", + "prediction": "635", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 156, + "img_width": 236, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "833": { + "question_id": "833", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Phenylalanine (Phe, 5) is a naturally occurring amino acid. What is the energy of interaction between its phenyl group and the electric dipole moment of a neighbouring peptide group? Take the distance between the groups as $4.0 \\mathrm{~nm}$ and treat the phenyl group as a benzene molecule. The magnitude of the dipole moment of the peptide group is $\\mu=1.3 \\mathrm{D}$ and the polarizability volume of benzene is $\\alpha^{\\prime}=1.04 \\times 10^{-29} \\mathrm{~m}^3$.", + "choices": null, + "answer": "-4.3", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 372, + "img_width": 474, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "835": { + "question_id": "835", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percent of people are wearing blue?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "837": { + "question_id": "837", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tiny red motorbikes than big red choppers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "839": { + "question_id": "839", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many years have value less than 10%?", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "841": { + "question_id": "841", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Some friends compared the sizes of their stuffed animal collections. What is the median of the numbers?'", + "choices": null, + "answer": "9", + "extraction": "9", + "prediction": "9", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 265, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "843": { + "question_id": "843", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua greater than Red?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 752, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "845": { + "question_id": "845", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 390, + "img_width": 550, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "847": { + "question_id": "847", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which function grows the fastest as x increases?\nChoices:\n(A) red\n(B) purple\n(C) blue", + "choices": [ + "red", + "purple", + "blue" + ], + "answer": "red", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "red", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1294, + "img_width": 1706, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "849": { + "question_id": "849", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The 4 8x8 images shown below are encoded with JPEG coding. Based on their expected DCT (Discrete Cosine Transform) coefficients, Which image has the most non-zero AC coefficients? (a): Image A, (b): Image B, (c): Image C, (d): Image D.\nChoices:\n(A) (c)\n(B) (d)\n(C) (a)\n(D) (b)\n(E) (e)", + "choices": [ + "(c)", + "(d)", + "(a)", + "(b)", + "(e)" + ], + "answer": "(b)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(c)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 282, + "img_width": 940, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "851": { + "question_id": "851", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the net concessional disbursements from imf greater than 32000000 US$?", + "choices": null, + "answer": "2", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1139, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "853": { + "question_id": "853", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the diamond ABCD, \u2220BAD = 120.0, the length of the diagonal AC is 3.0, then the perimeter of the diamond ABCD is ()\nChoices:\n(A) 3\n(B) 6\n(C) 9\n(D) 12", + "choices": [ + "3", + "6", + "9", + "12" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 98, + "img_width": 169, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "855": { + "question_id": "855", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $x$ so that $a \u2225 b$.\nChoices:\n(A) 2.5\n(B) 14\n(C) 15\n(D) 16", + "choices": [ + "2.5", + "14", + "15", + "16" + ], + "answer": "14", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 250, + "img_width": 536, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "857": { + "question_id": "857", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "859": { + "question_id": "859", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "27", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 603, + "img_width": 750, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "861": { + "question_id": "861", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Crimson less than Gray?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 680, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "863": { + "question_id": "863", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Rhode Island have the lowest value in the USA ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "865": { + "question_id": "865", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Hot Pink have the lowest value?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 512, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "867": { + "question_id": "867", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A food industry researcher compiled the revenues of several pizzerias. How much did Dan's Deep Dish make from pizza sales? (Unit: $)", + "choices": null, + "answer": "22", + "extraction": "14", + "prediction": "14", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 465, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "869": { + "question_id": "869", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large yellow matte cubes. Subtract all metal things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "871": { + "question_id": "871", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 200, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "873": { + "question_id": "873", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many groups of bars contain at least one bar with value smaller than 40?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "875": { + "question_id": "875", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow things. Subtract all blue cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "877": { + "question_id": "877", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms squad and warm?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "879": { + "question_id": "879", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large gray rubber things. Subtract all small blue spheres. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "881": { + "question_id": "881", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the population of grasshopper decreases, the population of mouse will most likely do what?\nChoices:\n(A) decrease\n(B) remain the same\n(C) increase\n(D) NA", + "choices": [ + "decrease", + "remain the same", + "increase", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "883": { + "question_id": "883", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "15", + "extraction": "18", + "prediction": "18", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 207, + "img_width": 868, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "885": { + "question_id": "885", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Grayson counted the number of pieces of pepperoni on each pizza he made. What is the smallest number of pieces of pepperoni? (Unit: pieces of pepperoni)", + "choices": null, + "answer": "18", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 136, + "img_width": 225, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "887": { + "question_id": "887", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u25b3ABC is the inscribed triangle of \u2299O. If \u2220ABC = 70.0, then the degree of \u2220AOC is equal to ()\nChoices:\n(A) 140\u00b0\n(B) 130\u00b0\n(C) 120\u00b0\n(D) 110\u00b0", + "choices": [ + "140\u00b0", + "130\u00b0", + "120\u00b0", + "110\u00b0" + ], + "answer": "140\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "140\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 106, + "img_width": 119, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "889": { + "question_id": "889", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Purple the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 472, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "891": { + "question_id": "891", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracy lower than 8 in at least one dataset?", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "893": { + "question_id": "893", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the limit of the blue function as x approaches negative infinity?", + "choices": null, + "answer": "0", + "extraction": "-4", + "prediction": "-4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 331, + "img_width": 327, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "895": { + "question_id": "895", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model has the lowest Audio-Audio Similarity and Text-Audio Similarity scores overall?\nChoices:\n(A) MusicLDM (mix-up)\n(B) MusicLDM (original)\n(C) MusicLDM (BLM)\n(D) MusicLDM (BAM)\n(E) MuBERT", + "choices": [ + "MusicLDM (mix-up)", + "MusicLDM (original)", + "MusicLDM (BLM)", + "MusicLDM (BAM)", + "MuBERT" + ], + "answer": "MuBERT", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "MusicLDM (mix-up)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "violin plot", + "grade": "college", + "img_height": 682, + "img_width": 1882, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "897": { + "question_id": "897", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use a calculator to find the measure of $\u2220J$ to the nearest degree.\nChoices:\n(A) 33\n(B) 40\n(C) 50\n(D) 57", + "choices": [ + "33", + "40", + "50", + "57" + ], + "answer": "40", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "33", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 223, + "img_width": 352, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "899": { + "question_id": "899", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number comes next?", + "choices": null, + "answer": "2123", + "extraction": "1357", + "prediction": "1357", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 185, + "img_width": 406, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "901": { + "question_id": "901", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all shiny spheres. Subtract all big red matte spheres. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "903": { + "question_id": "903", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, if \u2220ABC = 30.0, then the degree of \u2220AOC is ()\nChoices:\n(A) 30\u00b0\n(B) 45\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "30\u00b0", + "45\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "60\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 112, + "img_width": 110, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "905": { + "question_id": "905", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of large red cars behind the metal car less than the number of blue matte tandem bikes that are behind the big blue rubber utility bike?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "907": { + "question_id": "907", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When the military expenditure value was lower than 0.2%?", + "choices": null, + "answer": "1970", + "extraction": "1970", + "prediction": "1970", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "909": { + "question_id": "909", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b3ABC, DE \u2225 BC, if AD = 1.0, DB = 2.0, then the value of \\frac ADAB is ()\nChoices:\n(A) \\frac{2}{3}\n(B) \\frac{1}{4}\n(C) \\frac{1}{3}\n(D) \\frac{1}{2}", + "choices": [ + "\\frac{2}{3}", + "\\frac{1}{4}", + "\\frac{1}{3}", + "\\frac{1}{2}" + ], + "answer": "\\frac{1}{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{2}{3}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 132, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "911": { + "question_id": "911", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the smaller picture below the larger picture?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "913": { + "question_id": "913", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Cyan have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 763, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "915": { + "question_id": "915", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to the Lion population if the Gum Tree population decreased?\nChoices:\n(A) Unable to determine.\n(B) Nothing would happen.\n(C) It would also decrease.\n(D) It would increase.", + "choices": [ + "Unable to determine.", + "Nothing would happen.", + "It would also decrease.", + "It would increase." + ], + "answer": "It would also decrease.", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Unable to determine.", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 740, + "img_width": 528, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "917": { + "question_id": "917", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the ratio of the number of procedures to register a business in 2004 to that in 2007?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 939, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "919": { + "question_id": "919", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many items sold more than 3 units in at least one store?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "921": { + "question_id": "921", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x to the nearest tenth. Assume that segments that appear to be tangent are tangent.\nChoices:\n(A) 5\n(B) 8.1\n(C) 10.3\n(D) 21.6", + "choices": [ + "5", + "8.1", + "10.3", + "21.6" + ], + "answer": "21.6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 170, + "img_width": 226, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "923": { + "question_id": "923", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model achieves the highest score in terms of Rec?\nChoices:\n(A) Transformers Agent (GPT-4)\n(B) LLaMA-Adapter v2-7B\n(C) LLaVA-7B\n(D) Otter-9B \n(E) MM-ReAct-GPT-3.5\n(F) LLaVA-13B (LLaMA-2)\n(G) MM-ReAct-GPT-4", + "choices": [ + "Transformers Agent (GPT-4)", + "LLaMA-Adapter v2-7B", + "LLaVA-7B", + "Otter-9B ", + "MM-ReAct-GPT-3.5", + "LLaVA-13B (LLaMA-2)", + "MM-ReAct-GPT-4" + ], + "answer": "LLaVA-13B (LLaMA-2)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Transformers Agent (GPT-4)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1056, + "img_width": 1910, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "925": { + "question_id": "925", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Haley went to the store. She bought 3+9/10 pounds of pumpernickel bread crumbs. How much did she spend? (Unit: $)", + "choices": null, + "answer": "19.5", + "extraction": "15.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 130, + "img_width": 334, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "927": { + "question_id": "927", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0cAB\u7684\u5782\u76f4\u5e73\u5206\u7ebf\u4ea4AB\u4e8e\u70b9D\uff0c\u4ea4BC\u4e8e\u70b9E\uff0c\u8fde\u63a5AE\uff0e\u82e5AB\uff1d6\uff0c\u25b3ACE\u7684\u5468\u957f\u4e3a13\uff0c\u5219\u25b3ABC\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 19\n(B) 16\n(C) 29\n(D) 18", + "choices": [ + "19", + "16", + "29", + "18" + ], + "answer": "19", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "19", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 152, + "img_width": 199, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "929": { + "question_id": "929", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Tim need to buy a mystery game and a toy rocket? (Unit: $)", + "choices": null, + "answer": "85", + "extraction": "32", + "prediction": "32", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 192, + "img_width": 226, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "931": { + "question_id": "931", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u25b3ABC is the inscribed triangle of \u2299O, AB is the diameter of \u2299O, point D is a point on \u2299O, if \u2220ACD = 40.0, then the size of \u2220BAD is ()\nChoices:\n(A) 35\u00b0\n(B) 50\u00b0\n(C) 40\u00b0\n(D) 60\u00b0", + "choices": [ + "35\u00b0", + "50\u00b0", + "40\u00b0", + "60\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 123, + "img_width": 124, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "933": { + "question_id": "933", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Hector need to buy a European vacation package and an Australian vacation package? (Unit: $)", + "choices": null, + "answer": "9606", + "extraction": "1696", + "prediction": "1696", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 160, + "img_width": 344, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "935": { + "question_id": "935", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0cAD\uff1d6\uff0cAB\uff1d4\uff0cDE\u5e73\u5206\u2220ADC\u4ea4BC\u4e8e\u70b9E\uff0c\u5219BE\u7684\u957f\u662f\uff08\uff09\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5", + "choices": [ + "2", + "3", + "4", + "5" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 81, + "img_width": 140, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "937": { + "question_id": "937", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Periwinkle the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 785, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "939": { + "question_id": "939", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would be most affected if the clams all died?\nChoices:\n(A) squid\n(B) lantern fish\n(C) octopus\n(D) sea horse", + "choices": [ + "squid", + "lantern fish", + "octopus", + "sea horse" + ], + "answer": "octopus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "squid", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 764, + "img_width": 1162, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "941": { + "question_id": "941", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which is the next number in the series?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 327, + "img_width": 271, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "943": { + "question_id": "943", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between two consecutive major ticks on the Y-axis ?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1258, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "945": { + "question_id": "945", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 451, + "img_width": 610, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "947": { + "question_id": "947", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u2225CD\uff0cBC\u2225DE\uff0c\u2220A\uff1d45\u00b0\uff0c\u2220C\uff1d110\u00b0\uff0c\u5219\u2220AED\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 95\u00b0\n(B) 105\u00b0\n(C) 115\u00b0\n(D) 125\u00b0", + "choices": [ + "95\u00b0", + "105\u00b0", + "115\u00b0", + "125\u00b0" + ], + "answer": "115\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "95\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 170, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "949": { + "question_id": "949", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the combined percentage of Lowest ROI and Medium ROI in SEO?", + "choices": null, + "answer": "56", + "extraction": "25", + "prediction": "25", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "951": { + "question_id": "951", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $x$.\nChoices:\n(A) 10.25\n(B) 12.75\n(C) 18.75\n(D) 25.5", + "choices": [ + "10.25", + "12.75", + "18.75", + "25.5" + ], + "answer": "12.75", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10.25", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 427, + "img_width": 487, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "953": { + "question_id": "953", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of trees have leaves?", + "choices": null, + "answer": "50", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "955": { + "question_id": "955", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0e\u70b9O\u662f\u6b63\u4e94\u8fb9\u5f62ABCDE\u7684\u4e2d\u5fc3\uff0c\u2299O\u662f\u6b63\u4e94\u8fb9\u5f62\u7684\u5916\u63a5\u5706\uff0c\u2220ADE\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 30\u00b0\n(B) 32\u00b0\n(C) 36\u00b0\n(D) 40\u00b0", + "choices": [ + "30\u00b0", + "32\u00b0", + "36\u00b0", + "40\u00b0" + ], + "answer": "36\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 136, + "img_width": 136, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "957": { + "question_id": "957", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big brown buss behind the gray matte aeroplane greater than the number of yellow shiny scooters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "959": { + "question_id": "959", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The teachers at an elementary school counted how many desks they had in their classrooms. What is the median of the numbers?'", + "choices": null, + "answer": "32", + "extraction": "32", + "prediction": "32", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 230, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "961": { + "question_id": "961", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest value in blue bar?", + "choices": null, + "answer": "7", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "963": { + "question_id": "963", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For what x does f reach its local maximum?", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 397, + "img_width": 441, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "965": { + "question_id": "965", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: whats the lowest number yard line that you can see?", + "choices": null, + "answer": "30", + "extraction": "30", + "prediction": "30", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 690, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "967": { + "question_id": "967", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the amount earned from national visitors greater than the average amount earned from national visitors taken over all years ?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1146, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "969": { + "question_id": "969", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Yellow Green have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 587, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "971": { + "question_id": "971", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can the boy reach the highest book?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "973": { + "question_id": "973", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many zeros does this function have?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 2039, + "img_width": 2560, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "975": { + "question_id": "975", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown matte objects. Subtract all blue metallic objects. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "977": { + "question_id": "977", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5df2\u77e5AB\u2225CD\uff0cAF\u4e0eCD\u4ea4\u4e8e\u70b9E\uff0cBE\u22a5AF\uff0c\u2220B\uff1d65\u00b0\uff0c\u5219\u2220DEF\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 65\u00b0\n(B) 5\u00b0\n(C) 15\u00b0\n(D) 25\u00b0", + "choices": [ + "65\u00b0", + "5\u00b0", + "15\u00b0", + "25\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 129, + "img_width": 250, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "979": { + "question_id": "979", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9079", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 279, + "img_width": 634, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "981": { + "question_id": "981", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the sum of 2002, 2003 and 2004?", + "choices": null, + "answer": "70.4", + "extraction": "6006.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "983": { + "question_id": "983", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest accuracy reported in the whole chart?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "985": { + "question_id": "985", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the smallest percentage value recorded in the chart?", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "987": { + "question_id": "987", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A cross-section of an airplane wing is shown. Measurements of the thickness of the wing, in centimeters, at 20-centimeter intervals are 5.8, 20.3, 26.7, 29.0, 27.6, 27.3, 23.8, 20.5, 15.1, 8.7, and 2.8. Use the Midpoint Rule to estimate the area of the wing's cross-section.", + "choices": null, + "answer": "4232", + "extraction": "200", + "prediction": "200", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 338, + "img_width": 946, + "language": "english", + "skills": [ + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "989": { + "question_id": "989", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people like the most preferred object in the whole chart?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "991": { + "question_id": "991", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which functions have no zeros?\nChoices:\n(A) green and red\n(B) blue and orange\n(C) purple and grey", + "choices": [ + "green and red", + "blue and orange", + "purple and grey" + ], + "answer": "purple and grey", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "green and red", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 383, + "img_width": 616, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "993": { + "question_id": "993", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny yellow matte cylinders. Subtract all small blue rubber spheres. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "995": { + "question_id": "995", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the sum of the two bars in 18 to 21?", + "choices": null, + "answer": "18.27", + "extraction": "1.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "997": { + "question_id": "997", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is kx^2/2 larger than E at x=0?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 197, + "img_width": 537, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "999": { + "question_id": "999", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: The magnitude of the acceleration vector a is $10 \\mathrm{~cm} / \\mathrm{s}^2$. Use the figure to estimate the tangential components of $\\mathbf{a}$.", + "choices": null, + "answer": "4.5", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 484, + "img_width": 478, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "2": { + "question_id": "2", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the total volume of the measuring cup? (Unit: g)", + "choices": null, + "answer": "1000", + "extraction": "1000", + "prediction": "1000", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 768, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "4": { + "question_id": "4", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large rubber spheres. Subtract all big shiny cylinders. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "6": { + "question_id": "6", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, if CB = 4.0, DB = 7.0, and D is the midpoint of AC, then the length of AC is ()\nChoices:\n(A) 6cm\n(B) 7cm\n(C) 4cm\n(D) 5cm", + "choices": [ + "6cm", + "7cm", + "4cm", + "5cm" + ], + "answer": "6cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6cm", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 30, + "img_width": 203, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "8": { + "question_id": "8", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny gray bicycles that are on the left side of the brown metal sedan greater than the number of things that are to the left of the tiny green bicycle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "10": { + "question_id": "10", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which object comes next?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "E", + "extraction": "D", + "prediction": "D", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 418, + "img_width": 376, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "12": { + "question_id": "12", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer metallic fighters than rubber objects?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "14": { + "question_id": "14", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny objects that are behind the small metal jet less than the number of tiny things left of the tiny sedan?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "16": { + "question_id": "16", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many items sold less than 5 units in at least one store?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "18": { + "question_id": "18", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The passage below describes an experiment. Read the passage and then follow the instructions below.\n\nLinda applied a thin layer of wax to the underside of her snowboard and rode the board straight down a hill. Then, she removed the wax and rode the snowboard straight down the hill again. She repeated the rides four more times, alternating whether she rode with a thin layer of wax on the board or not. Her friend Bob timed each ride. Linda and Bob calculated the average time it took to slide straight down the hill on the snowboard with wax compared to the average time on the snowboard without wax.\nFigure: snowboarding down a hill. Identify the question that Linda and Bob's experiment can best answer.\nChoices:\n(A) Does Linda's snowboard slide down a hill in less time when it has a thin layer of wax or a thick layer of wax?\n(B) Does Linda's snowboard slide down a hill in less time when it has a layer of wax or when it does not have a layer of wax?", + "choices": [ + "Does Linda's snowboard slide down a hill in less time when it has a thin layer of wax or a thick layer of wax?", + "Does Linda's snowboard slide down a hill in less time when it has a layer of wax or when it does not have a layer of wax?" + ], + "answer": "Does Linda's snowboard slide down a hill in less time when it has a layer of wax or when it does not have a layer of wax?", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Does Linda's snowboard slide down a hill in less time when it has a thin layer of wax or a thick layer of wax?", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "elementary school", + "img_height": 232, + "img_width": 302, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "20": { + "question_id": "20", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sum of smallest two bar is greater then the largest bar?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "22": { + "question_id": "22", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 785, + "img_width": 555, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "24": { + "question_id": "24", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 709, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "26": { + "question_id": "26", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Black greater than Deep Sky Blue?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 761, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "28": { + "question_id": "28", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{AB}$ is a diameter, $AC=8$ inches, and $BC=15$ inches. Find the radius of the circle.\nChoices:\n(A) 7.5\n(B) 8\n(C) 8.5\n(D) 17", + "choices": [ + "7.5", + "8", + "8.5", + "17" + ], + "answer": "8.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 431, + "img_width": 519, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "30": { + "question_id": "30", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the two chords AB and CD in the circle intersect at E, \u2220D = 35.0, \u2220AEC = 105.0, then \u2220C = ()\nChoices:\n(A) 60\u00b0\n(B) 70\u00b0\n(C) 80\u00b0\n(D) 85\u00b0", + "choices": [ + "60\u00b0", + "70\u00b0", + "80\u00b0", + "85\u00b0" + ], + "answer": "70\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 113, + "img_width": 117, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "32": { + "question_id": "32", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0cAB\uff1dAC\uff0c\u2220CAB\uff1d40\u00b0\uff0c\u5219\u2220D\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 40\u00b0\n(B) 50\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "40\u00b0", + "50\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "70\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 100, + "img_width": 168, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "34": { + "question_id": "34", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function continuous at each point?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 479, + "img_width": 479, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "36": { + "question_id": "36", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "9", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 800, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "38": { + "question_id": "38", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values smaller than 6?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "40": { + "question_id": "40", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown blocks. Subtract all large blue rubber things. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "42": { + "question_id": "42", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "8", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 539, + "img_width": 401, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "44": { + "question_id": "44", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Chase wants to buy 4 kilograms of oval beads and 5 kilograms of star-shaped beads. How much will he spend? (Unit: $)", + "choices": null, + "answer": "18", + "extraction": "14", + "prediction": "14", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 226, + "img_width": 305, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "46": { + "question_id": "46", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to the population of adult spiders if predator ate all the spider eggs?\nChoices:\n(A) Adult spider population would remain the same\n(B) Adult spider population would double.\n(C) Adults spider population would decrease\n(D) Adult spider population would increase.", + "choices": [ + "Adult spider population would remain the same", + "Adult spider population would double.", + "Adults spider population would decrease", + "Adult spider population would increase." + ], + "answer": "Adults spider population would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Adult spider population would remain the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 829, + "img_width": 1024, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "48": { + "question_id": "48", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle 3$.\nChoices:\n(A) 28\n(B) 38\n(C) 52\n(D) 62", + "choices": [ + "28", + "38", + "52", + "62" + ], + "answer": "38", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "28", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 426, + "img_width": 596, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "50": { + "question_id": "50", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the food web, what would likely happen if the number of large roach would decrease?\nChoices:\n(A) The population of steelheads would decrease.\n(B) The population of stickleback fry would increase.\n(C) The population of predatory insects would increase.\n(D) The population of predatory insects would decrease.", + "choices": [ + "The population of steelheads would decrease.", + "The population of stickleback fry would increase.", + "The population of predatory insects would increase.", + "The population of predatory insects would decrease." + ], + "answer": "The population of predatory insects would decrease.", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "The population of steelheads would decrease.", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 600, + "img_width": 633, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "52": { + "question_id": "52", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big red metallic spheres. Subtract all big brown matte things. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "54": { + "question_id": "54", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, the ratio of the length of line AB to the length of line AC is 2 : 5. If AC = 25, what is the length of line AB?\nChoices:\n(A) 8\n(B) 10\n(C) 15\n(D) 18\n(E) 20", + "choices": [ + "8", + "10", + "15", + "18", + "20" + ], + "answer": "10", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "8", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 310, + "img_width": 433, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "56": { + "question_id": "56", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the rectangle?", + "choices": null, + "answer": "6", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 295, + "img_width": 202, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "58": { + "question_id": "58", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Firebrick have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 760, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "60": { + "question_id": "60", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "22", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 381, + "img_width": 477, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "62": { + "question_id": "62", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cE\uff0cF\u5206\u522b\u662f\u83f1\u5f62ABCD\u7684\u8fb9AB\uff0cAD\u7684\u4e2d\u70b9\uff0c\u4e14AB\uff1d5\uff0cAC\uff1d6\uff0e\u5219EF\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 4\n(B) 5\n(C) 5.5\n(D) 6", + "choices": [ + "4", + "5", + "5.5", + "6" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 138, + "img_width": 160, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "64": { + "question_id": "64", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagrams below show two pure samples of gas in identical closed, rigid containers. Each colored ball represents one gas particle. Both samples have the same number of particles. Compare the average kinetic energies of the particles in each sample. Which sample has the higher temperature?\nChoices:\n(A) neither; the samples have the same temperature\n(B) sample A\n(C) sample B", + "choices": [ + "neither; the samples have the same temperature", + "sample A", + "sample B" + ], + "answer": "sample A", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; the samples have the same temperature", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "elementary school", + "img_height": 405, + "img_width": 550, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "66": { + "question_id": "66", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer for the missing picture.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "A", + "extraction": "E", + "prediction": "E", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 562, + "img_width": 320, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "68": { + "question_id": "68", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5c06\u4e00\u6839\u957f\u5ea6\u4e3a16cm\u81ea\u7136\u4f38\u76f4\u7684\u5f39\u6027\u76ae\u7b4bAB\u4e24\u7aef\u56fa\u5b9a\u5728\u6c34\u5e73\u7684\u684c\u9762\u4e0a\uff0c\u7136\u540e\u628a\u4e2d\u70b9C\u7ad6\u76f4\u5411\u4e0a\u62c9\u53476cm\u81f3D\u70b9\uff08\u5982\u56fe\uff09\uff0c\u5219\u8be5\u5f39\u6027\u76ae\u7b4b\u88ab\u62c9\u957f\u4e86\uff08\uff09\nChoices:\n(A) 2cm\n(B) 4cm\n(C) 6cm\n(D) 8cm", + "choices": [ + "2cm", + "4cm", + "6cm", + "8cm" + ], + "answer": "4cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 84, + "img_width": 252, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "70": { + "question_id": "70", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "8", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2600, + "img_width": 2266, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "72": { + "question_id": "72", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A real estate agent drove around the neighborhood and counted the number of houses on each block. How many blocks have exactly 36 houses? (Unit: blocks)", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 136, + "img_width": 197, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "74": { + "question_id": "74", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the difference of largest and smallest bar?", + "choices": null, + "answer": "47.6", + "extraction": "100.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "76": { + "question_id": "76", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens to fish if pelicans increase?\nChoices:\n(A) decrease\n(B) nothing\n(C) increase\n(D) none of the above", + "choices": [ + "decrease", + "nothing", + "increase", + "none of the above" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 947, + "img_width": 850, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "78": { + "question_id": "78", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Find the missing value.", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 394, + "img_width": 1062, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "80": { + "question_id": "80", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the food web, what will happen if all the algae died due to pesticides?\nChoices:\n(A) Crabs and limpets will decrease\n(B) Dolphins will increase\n(C) Sea gulls will become extinct\n(D) Star fish will increase", + "choices": [ + "Crabs and limpets will decrease", + "Dolphins will increase", + "Sea gulls will become extinct", + "Star fish will increase" + ], + "answer": "Crabs and limpets will decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Crabs and limpets will decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 199, + "img_width": 372, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "82": { + "question_id": "82", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A square is inscribed in a circle of area 18$\\pi$ square units. Find the length of a side of the square.\nChoices:\n(A) 3\n(B) 3 \\sqrt 2\n(C) 6\n(D) 6 \\sqrt 2", + "choices": [ + "3", + "3 \\sqrt 2", + "6", + "6 \\sqrt 2" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 202, + "img_width": 200, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "84": { + "question_id": "84", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: ABCD is a square. Inscribed Circle center is O. Find the the angle of \u2220AMK. Return the numeric value.", + "choices": null, + "answer": "130.9", + "extraction": "120.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 1220, + "img_width": 1194, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "86": { + "question_id": "86", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model has the highest Acc score when Pretrain Loss is equal to 1.80?\nChoices:\n(A) ICL\n(B) SFT\n(C) SFT 1/8\n(D) RFT k=100\n(E) RFT k=25\n(F) RET k=6\n(G) RFT U13B", + "choices": [ + "ICL", + "SFT", + "SFT 1/8", + "RFT k=100", + "RFT k=25", + "RET k=6", + "RFT U13B" + ], + "answer": "RFT U13B", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "ICL", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 1046, + "img_width": 1734, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "88": { + "question_id": "88", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A square is tangent to a line at point P in the figure above. What is the value of x?", + "choices": null, + "answer": "30", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 277, + "img_width": 442, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "90": { + "question_id": "90", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow matte blocks. Subtract all tiny brown cylinders. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "92": { + "question_id": "92", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do the windows have a geometric shape that most houses have?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 375, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "94": { + "question_id": "94", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cD\u4e3a\u25b3ABC\u5185\u4e00\u70b9\uff0cCD\u5e73\u5206\u2220ACB\uff0cBD\u22a5CD\uff0c\u2220A\uff1d\u2220ABD\uff0c\u82e5\u2220DBC\uff1d54\u00b0\uff0c\u5219\u2220A\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 36\u00b0\n(B) 44\u00b0\n(C) 27\u00b0\n(D) 54\u00b0", + "choices": [ + "36\u00b0", + "44\u00b0", + "27\u00b0", + "54\u00b0" + ], + "answer": "27\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "36\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 74, + "img_width": 160, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "96": { + "question_id": "96", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: How many times Dissatisfied more than satisfied?", + "choices": null, + "answer": "3.9", + "extraction": "2.7", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 328, + "img_width": 186, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "98": { + "question_id": "98", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Find the value of the square in the figure.", + "choices": null, + "answer": "2", + "extraction": "7", + "prediction": "7", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 506, + "img_width": 900, + "language": "english", + "skills": [ + "logical reasoning", + "algebraic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "100": { + "question_id": "100", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of all the values in the ruling group?", + "choices": null, + "answer": "12", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "102": { + "question_id": "102", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The shape is made of unit squares. What is the area of the shape?", + "choices": null, + "answer": "6", + "extraction": "36", + "prediction": "36", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 156, + "img_width": 106, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "104": { + "question_id": "104", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?", + "choices": null, + "answer": "0.8", + "extraction": "1.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "106": { + "question_id": "106", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values smaller than 1?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "108": { + "question_id": "108", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Find out the average of the bottom two countries ??", + "choices": null, + "answer": "51.04", + "extraction": "40.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "110": { + "question_id": "110", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sum of two lowest bar is greater then the largest bar?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "112": { + "question_id": "112", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big cyan airliners less than the number of gray shiny utility bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "114": { + "question_id": "114", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, KL is tangent to $\\odot M$ at K. Find the value of x.\nChoices:\n(A) 6.00\n(B) 9.45\n(C) 18.9\n(D) 37.8", + "choices": [ + "6.00", + "9.45", + "18.9", + "37.8" + ], + "answer": "9.45", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6.00", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 273, + "img_width": 347, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "116": { + "question_id": "116", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which leaf has the most veins?\nChoices:\n(A) Acuminate\n(B) Truncate\n(C) Mucronate\n(D) Acute", + "choices": [ + "Acuminate", + "Truncate", + "Mucronate", + "Acute" + ], + "answer": "Acuminate", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Acuminate", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 187, + "img_width": 350, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "118": { + "question_id": "118", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the maximum value of this function?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 296, + "img_width": 600, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "120": { + "question_id": "120", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the degree of this function?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 320, + "img_width": 312, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "122": { + "question_id": "122", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer yellow regular buss than small yellow metallic school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "124": { + "question_id": "124", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: This type of leaf arrangement consists of at least three leaves attached to a node.\nChoices:\n(A) Whorled\n(B) Simple\n(C) Opposite\n(D) Alternate", + "choices": [ + "Whorled", + "Simple", + "Opposite", + "Alternate" + ], + "answer": "Whorled", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Whorled", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 225, + "img_width": 576, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "126": { + "question_id": "126", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the leftmost and the rigtmost person? (Unit: years)", + "choices": null, + "answer": "9", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 800, + "img_width": 623, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "128": { + "question_id": "128", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large metal blocks. Subtract all yellow cylinders. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "130": { + "question_id": "130", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1403, + "img_width": 1063, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "132": { + "question_id": "132", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u57284\u00d74\u7684\u6b63\u65b9\u5f62\u7f51\u683c\u4e2d\uff0c\u6bcf\u4e2a\u5c0f\u6b63\u65b9\u5f62\u7684\u8fb9\u957f\u5747\u4e3a1\uff0c\u70b9A\uff0cB\uff0cC\u90fd\u5728\u683c\u70b9\u4e0a\uff0cAD\u22a5BC\u4e8eD\uff0c\u5219AD\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 1.5\n(C) 2\n(D) \\frac{7}{3}", + "choices": [ + "1", + "1.5", + "2", + "\\frac{7}{3}" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 160, + "img_width": 155, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "134": { + "question_id": "134", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: People can use the engineering-design process to develop solutions to problems. One step in the process is testing if a potential solution meets the requirements of the design.\nThe passage below describes how the engineering-design process was used to test a solution to a problem. Read the passage. Then answer the question below.\n\nCooper was a landscape architect who was hired to design a new city park. The city council wanted the park to have space for outdoor concerts and to have at least 20% of the park shaded by trees. Cooper thought the concert area should be at least 150 meters from the road so traffic noise didn't interrupt the music. He developed three possible designs for the park with the concert area in a different location in each design. Then, he tested each design by measuring the distance between the road and the concert area.\nFigure: studying an architect's design. Which of the following could Cooper's test show?\nChoices:\n(A) if at least 20% of the park would be shaded by trees in each design\n(B) which design would have the least traffic noise in the concert area\n(C) which design would have the greatest distance between the concert area and the road", + "choices": [ + "if at least 20% of the park would be shaded by trees in each design", + "which design would have the least traffic noise in the concert area", + "which design would have the greatest distance between the concert area and the road" + ], + "answer": "which design would have the greatest distance between the concert area and the road", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "if at least 20% of the park would be shaded by trees in each design", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "high school", + "img_height": 232, + "img_width": 302, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "136": { + "question_id": "136", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest and the lowest value of blue bar?", + "choices": null, + "answer": "64", + "extraction": "16", + "prediction": "16", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 443, + "img_width": 415, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "138": { + "question_id": "138", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sandwich cut in half?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "140": { + "question_id": "140", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which food has the least carbs?\nChoices:\n(A) soup\n(B) water\n(C) sandwich\n(D) buns", + "choices": [ + "soup", + "water", + "sandwich", + "buns" + ], + "answer": "soup", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "soup", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 428, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "142": { + "question_id": "142", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is it split in half?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 425, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "144": { + "question_id": "144", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Natalie buys 4.6 kilograms of turmeric. What is the total cost? (Unit: $)", + "choices": null, + "answer": "13.8", + "extraction": "18.4", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 162, + "img_width": 210, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "146": { + "question_id": "146", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Kimberly's classmates revealed how many science articles they read. What is the range of the numbers?'", + "choices": null, + "answer": "4", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 286, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "148": { + "question_id": "148", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which leaf shape has the smallest base?\nChoices:\n(A) Hastate\n(B) Cordate\n(C) Sagittate\n(D) Decurrent", + "choices": [ + "Hastate", + "Cordate", + "Sagittate", + "Decurrent" + ], + "answer": "Decurrent", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Hastate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 161, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "150": { + "question_id": "150", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A, B, and C are three points on \u2299O, and the straight line CD and \u2299O are tangent to point C. If \u2220DCB = 40.0, then the degree of \u2220CAB is ()\nChoices:\n(A) 40\u00b0\n(B) 50\u00b0\n(C) 80\u00b0\n(D) 100\u00b0", + "choices": [ + "40\u00b0", + "50\u00b0", + "80\u00b0", + "100\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 144, + "img_width": 110, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "152": { + "question_id": "152", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfl1\u2225l2\uff0c\u5c06\u542b30\u00b0\u89d2\u7684\u76f4\u89d2\u4e09\u89d2\u677f\u6309\u5982\u56fe\u65b9\u5f0f\u653e\u7f6e\uff0c\u76f4\u89d2\u9876\u70b9\u5728l2\u4e0a\uff0c\u82e5\u22201\uff1d76\u00b0\uff0c\u5219\u22202\uff1d\uff08\uff09\nChoices:\n(A) 36\u00b0\n(B) 45\u00b0\n(C) 44\u00b0\n(D) 64\u00b0", + "choices": [ + "36\u00b0", + "45\u00b0", + "44\u00b0", + "64\u00b0" + ], + "answer": "44\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "36\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 208, + "img_width": 229, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "154": { + "question_id": "154", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this an odd function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 744, + "img_width": 1114, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "156": { + "question_id": "156", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the limit of the as x approaches 1 from the left side?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 291, + "img_width": 327, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "158": { + "question_id": "158", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 685, + "img_width": 911, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "160": { + "question_id": "160", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x.\nChoices:\n(A) 10\n(B) 11\n(C) 12\n(D) 13", + "choices": [ + "10", + "11", + "12", + "13" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 227, + "img_width": 270, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "162": { + "question_id": "162", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The bird watcher counted the number of birds in each flock that passed overhead. How many flocks had at least 17 birds but fewer than 33 birds? (Unit: flocks)", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 202, + "img_width": 117, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "164": { + "question_id": "164", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b1ABCD, CE \u22a5 AB, point E is the foot of perpendicular, if \u2220D = 55.0, then \u2220BCE = ()\nChoices:\n(A) 55\u00b0\n(B) 35\u00b0\n(C) 25\u00b0\n(D) 30\u00b0", + "choices": [ + "55\u00b0", + "35\u00b0", + "25\u00b0", + "30\u00b0" + ], + "answer": "35\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "55\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 84, + "img_width": 161, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "166": { + "question_id": "166", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which Shape is missing?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "B", + "extraction": "A", + "prediction": "A", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 816, + "img_width": 2028, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "168": { + "question_id": "168", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given that the Hue-Saturation subspace shown in Fig. Q2 is a perfect circle and that colors A, B and C can be represented as the 3 points shown in the subspace. Which color has the smallest saturation coefficient?\nChoices:\n(A) (c)\n(B) (a)\n(C) (e)\n(D) (d)\n(E) (b)", + "choices": [ + "(c)", + "(a)", + "(e)", + "(d)", + "(b)" + ], + "answer": "(b)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(c)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 454, + "img_width": 414, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "170": { + "question_id": "170", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: f(-1) is ____ f(0).\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "smaller than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 296, + "img_width": 600, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "172": { + "question_id": "172", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Seafoam less than Dark Salmon?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 524, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "174": { + "question_id": "174", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tiny cyan suvs that are behind the aeroplane than cyan utility bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "176": { + "question_id": "176", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $RS$ if $\\triangle QRS$ is an equilateral triangle.\nChoices:\n(A) 0.5\n(B) 1\n(C) 1.5\n(D) 2", + "choices": [ + "0.5", + "1", + "1.5", + "2" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 292, + "img_width": 305, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "178": { + "question_id": "178", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9A\u3001C\u5728\u2220FBD\u7684\u4e24\u6761\u8fb9BF\u3001BD\u4e0a\uff0cBE\u5e73\u5206\u2220FBD\uff0cCE\u5e73\u5206\u2220ACD\uff0c\u8fde\u63a5AE\uff0c\u82e5\u2220BEC\uff1d35\u00b0\uff0c\u5219\u2220FAE\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 35\u00b0\n(B) 45\u00b0\n(C) 55\u00b0\n(D) 65\u00b0", + "choices": [ + "35\u00b0", + "45\u00b0", + "55\u00b0", + "65\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 99, + "img_width": 129, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "180": { + "question_id": "180", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny brown cylinders. Subtract all tiny brown objects. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "182": { + "question_id": "182", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Green greater than Yellow?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 589, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "184": { + "question_id": "184", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values smaller than 0?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "186": { + "question_id": "186", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, CD is a plane mirror, the light is emitted from point A, reflected by point E on CD, and irradiated to point B. If the incident angle is \u03b1, AC \u22a5 CD, BD \u22a5 CD, the feet of perpendicular are C, D, and AC = 3.0, BD = 6.0, CD = 10.0, then the length of the line segment ED is ()\nChoices:\n(A) \\frac{20}{3}\n(B) \\frac{10}{3}\n(C) 7\n(D) \\frac{14}{3}", + "choices": [ + "\\frac{20}{3}", + "\\frac{10}{3}", + "7", + "\\frac{14}{3}" + ], + "answer": "\\frac{20}{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{20}{3}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 112, + "img_width": 183, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "188": { + "question_id": "188", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many methods in the table achieve an A-847 score higher than 20.0?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 634, + "img_width": 2226, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "190": { + "question_id": "190", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 132, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "192": { + "question_id": "192", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the diameter CD of \u2299O crosses the midpoint G of chord EF, \u2220DCF = 20.0, then \u2220EOD is equal to ()\nChoices:\n(A) 10\u00b0\n(B) 20\u00b0\n(C) 40\u00b0\n(D) 80\u00b0", + "choices": [ + "10\u00b0", + "20\u00b0", + "40\u00b0", + "80\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 127, + "img_width": 101, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "194": { + "question_id": "194", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: On average, how many people can commute on this vehicle?", + "choices": null, + "answer": "50", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 408, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "196": { + "question_id": "196", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\u6240\u793a\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u5df2\u77e5\u70b9D\uff0cE\uff0cF\u5206\u522b\u4e3a\u8fb9BC\uff0cAD\uff0cCE\u7684\u4e2d\u70b9\uff0c\u4e14S\u25b3ABC\uff1d4cm2\uff0c\u5219S\u25b3DEF\u7b49\u4e8e\uff08\uff09\nChoices:\n(A) 2cm2\n(B) 1cm2\n(C) 0.5cm2\n(D) 0.25cm2", + "choices": [ + "2cm2", + "1cm2", + "0.5cm2", + "0.25cm2" + ], + "answer": "0.5cm2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2cm2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 81, + "img_width": 110, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "198": { + "question_id": "198", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Calculate the missing value.\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4", + "choices": [ + "1", + "2", + "3", + "4" + ], + "answer": "1", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 756, + "img_width": 890, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "200": { + "question_id": "200", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Sky Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 404, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "202": { + "question_id": "202", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "204": { + "question_id": "204", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: \u0627\u0632 \u0633\u0645\u062a \u0631\u0627\u0633\u062a \u062a\u0635\u0648\u06cc\u0631 \u062f\u0631\u0628 \u062f\u0648\u0645 \u0686\u0646\u062f \u0634\u06cc\u0634\u0647 \u0628\u062f\u0648\u0646 \u0631\u0646\u06af \u062f\u0627\u0631\u0647\u061f", + "choices": null, + "answer": "12", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 376, + "img_width": 564, + "language": "persian", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "ParsVQA-Caps", + "split": "testmini", + "task": "visual question answering" + }, + "206": { + "question_id": "206", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the scale factor from $Q$ to $Q'$.\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5", + "choices": [ + "2", + "3", + "4", + "5" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 611, + "img_width": 731, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "208": { + "question_id": "208", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the leftmost and the rigtmost person? (Unit: years)", + "choices": null, + "answer": "5", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 195, + "img_width": 300, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "210": { + "question_id": "210", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 370, + "img_width": 493, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "212": { + "question_id": "212", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cornflower the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 403, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "214": { + "question_id": "214", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of amount earned from merchandise imports in Canada greater than the average percentage of amount earned from merchandise imports in Canada taken over all years ?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1109, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "216": { + "question_id": "216", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people like the most preferred object in the whole chart?", + "choices": null, + "answer": "90", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "218": { + "question_id": "218", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large red rubber blocks. Subtract all tiny red matte objects. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "220": { + "question_id": "220", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u2299O is the circumscribed circle of the quadrilateral ABCD, if \u2220O = 110.0, then the degree of \u2220C is ()\nChoices:\n(A) 125\u00b0\n(B) 120\u00b0\n(C) 105\u00b0\n(D) 90\u00b0", + "choices": [ + "125\u00b0", + "120\u00b0", + "105\u00b0", + "90\u00b0" + ], + "answer": "125\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "125\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 128, + "img_width": 124, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "222": { + "question_id": "222", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue shiny spheres. Subtract all big blue shiny cubes. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "224": { + "question_id": "224", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this a periodic function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 744, + "img_width": 1114, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "226": { + "question_id": "226", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past three.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "228": { + "question_id": "228", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of circle O, DB and DC are respectively tangent to circle O at points B and C. If \u2220ACE = 25.0, then the degree of \u2220D is ()\nChoices:\n(A) 50\u00b0\n(B) 55\u00b0\n(C) 60\u00b0\n(D) 65\u00b0", + "choices": [ + "50\u00b0", + "55\u00b0", + "60\u00b0", + "65\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 97, + "img_width": 137, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "230": { + "question_id": "230", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracy higher than 9 in at least one dataset?", + "choices": null, + "answer": "0", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "232": { + "question_id": "232", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagram below is a model of two solutions. Each pink ball represents one particle of solute. Which solution has a higher concentration of pink particles?\nChoices:\n(A) neither; their concentrations are the same\n(B) Solution B\n(C) Solution A", + "choices": [ + "neither; their concentrations are the same", + "Solution B", + "Solution A" + ], + "answer": "Solution B", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; their concentrations are the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 251, + "img_width": 378, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "234": { + "question_id": "234", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure shown above, AC = 6. What is the length of segment AB?\nChoices:\n(A) 3\n(B) 5\n(C) 6\n(D) 7\n(E) It cannot be determined from the information given", + "choices": [ + "3", + "5", + "6", + "7", + "It cannot be determined from the information given" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 378, + "img_width": 434, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "236": { + "question_id": "236", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $z$.\nChoices:\n(A) 7\n(B) 9\n(C) 12\n(D) 15", + "choices": [ + "7", + "9", + "12", + "15" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 423, + "img_width": 447, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "238": { + "question_id": "238", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find PT\nChoices:\n(A) 6\n(B) \\frac { 20 } { 3 }\n(C) 7\n(D) 22 / 3", + "choices": [ + "6", + "\\frac { 20 } { 3 }", + "7", + "22 / 3" + ], + "answer": "\\frac { 20 } { 3 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 250, + "img_width": 238, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "240": { + "question_id": "240", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2387, + "img_width": 3500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "242": { + "question_id": "242", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle A$ of quadrilateral ABCD\nChoices:\n(A) 45\n(B) 90\n(C) 135\n(D) 180", + "choices": [ + "45", + "90", + "135", + "180" + ], + "answer": "135", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 381, + "img_width": 621, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "244": { + "question_id": "244", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Aqua have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 500, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "246": { + "question_id": "246", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Assume that all gases are perfect and that data refer to 298 K unless otherwise stated. In 1995, the Intergovernmental Panel on Climate Change (IPCC) considered a global average temperature rise of $1.0-3.5^{\\circ} \\mathrm{C}$ likely by the year 2100 , with $2.0^{\\circ} \\mathrm{C}$ its best estimate. Because water vapour is itself a greenhouse gas, the increase in water vapour content of the atmosphere is of some concern to climate change experts. Predict the relative increase in water vapour in the atmosphere based on a temperature rises of $2.0 \\mathrm{~K}$, assuming that the relative humidity remains constant. (The present global mean temperature is $290 \\mathrm{~K}$, and the equilibrium vapour pressure of water at that temperature is 0.0189 bar.)", + "choices": null, + "answer": "13", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 216, + "img_width": 1098, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "248": { + "question_id": "248", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of green matte choppers greater than the number of large yellow shiny motorbikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "250": { + "question_id": "250", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The area $A$ of the shaded region is given. Find $x$. $A = 66$ cm$^2$ .\nChoices:\n(A) 4.6\n(B) 6.5\n(C) 13.0\n(D) 26.0", + "choices": [ + "4.6", + "6.5", + "13.0", + "26.0" + ], + "answer": "13.0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4.6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 286, + "img_width": 303, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "252": { + "question_id": "252", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Consider the infinitely long chain of resistors shown below. What is the resistance between terminals a and b if R=1?", + "choices": null, + "answer": "0.73", + "extraction": "0.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 169, + "img_width": 463, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "254": { + "question_id": "254", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big objects that are in front of the metal fighter less than the number of things that are behind the big metallic bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "256": { + "question_id": "256", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u25b3ABC\u4e2d\uff0cAD\u5e73\u5206\u2220BAC\uff0cAD\u4ea4BC\u4e8e\u70b9D\uff0cDE\u22a5AB\uff0c\u5782\u8db3\u4e3aE\uff0c\u82e5DE\uff1d3\uff0cAC\uff1d4\uff0c\u5219\u25b3ADC\u7684\u9762\u79ef\u4e3a\uff08\uff09\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 75, + "img_width": 148, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "258": { + "question_id": "258", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An employee at the craft store counted the number of red buttons in each bag of mixed buttons. How many bags had at least 60 red buttons but fewer than 81 red buttons?'", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 224, + "img_width": 156, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "260": { + "question_id": "260", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the derivative of the function positive between [1, 2] assuming that it's differentiable?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 368, + "img_width": 412, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "262": { + "question_id": "262", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between genres of tv shows watched by highest female and lowest female?", + "choices": null, + "answer": "39", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 756, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "264": { + "question_id": "264", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For Group C, in which week is the cumulative increase in weight , the highest?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2237, + "img_width": 1754, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "266": { + "question_id": "266", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which has the most uneven shape?\nChoices:\n(A) oblique\n(B) obtuse\n(C) cordate\n(D) truncate", + "choices": [ + "oblique", + "obtuse", + "cordate", + "truncate" + ], + "answer": "oblique", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "oblique", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 225, + "img_width": 240, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "268": { + "question_id": "268", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Colton wants to buy 1+3/10 kilograms of English muffins. How much will he spend? (Unit: $)", + "choices": null, + "answer": "10.4", + "extraction": "1.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 194, + "img_width": 273, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "270": { + "question_id": "270", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A and B are three points on \u2299O and AB = AC. Connect BO and CO, if \u2220ABC = 65.0, then the degree of \u2220BOC is ()\nChoices:\n(A) 50\u00b0\n(B) 65\u00b0\n(C) 100\u00b0\n(D) 130\u00b0", + "choices": [ + "50\u00b0", + "65\u00b0", + "100\u00b0", + "130\u00b0" + ], + "answer": "100\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 114, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "272": { + "question_id": "272", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time does the clock show?\nChoices:\n(A) 9:30\n(B) 1:30\n(C) 4:30\n(D) 5:30\n(E) 11:30", + "choices": [ + "9:30", + "1:30", + "4:30", + "5:30", + "11:30" + ], + "answer": "4:30", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "9:30", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 261, + "img_width": 261, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "274": { + "question_id": "274", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u3001BC\u3001CD\u3001DA\u90fd\u662f\u2299O\u7684\u5207\u7ebf\uff0c\u5df2\u77e5AD\uff1d2\uff0cBC\uff1d5\uff0c\u5219AB+CD\u7684\u503c\u662f\uff08\uff09\nChoices:\n(A) 14\n(B) 12\n(C) 9\n(D) 7", + "choices": [ + "14", + "12", + "9", + "7" + ], + "answer": "7", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "14", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 119, + "img_width": 151, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "276": { + "question_id": "276", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, it is known that the radius of \u2299O is 5.0 and the chord AB = 8.0, then the distance from the center O to AB is ()\nChoices:\n(A) 1mm\n(B) 2mm\n(C) 3mm\n(D) 4mm", + "choices": [ + "1mm", + "2mm", + "3mm", + "4mm" + ], + "answer": "3mm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1mm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 102, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "278": { + "question_id": "278", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the following objects, which one has the best PSNR score?\nChoices:\n(A) Lego\n(B) Mats\n(C) Mic\n(D) Ship", + "choices": [ + "Lego", + "Mats", + "Mic", + "Ship" + ], + "answer": "Mic", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Lego", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 940, + "img_width": 1478, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "280": { + "question_id": "280", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, ABCDEF is a regular hexagon, and its center is point O. What is the value of x?\nChoices:\n(A) 80\n(B) 60\n(C) 40\n(D) 30\n(E) 20", + "choices": [ + "80", + "60", + "40", + "30", + "20" + ], + "answer": "60", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "80", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 123, + "img_width": 130, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "282": { + "question_id": "282", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percent of the sun is showing?", + "choices": null, + "answer": "100", + "extraction": "100", + "prediction": "100", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "284": { + "question_id": "284", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the accuracy of the algorithm with lowest accuracy?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "286": { + "question_id": "286", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5c06\u4e00\u6839\u957f\u5ea6\u4e3a8cm\uff0c\u81ea\u7136\u4f38\u76f4\u7684\u5f39\u6027\u76ae\u7b4bAB\u4e24\u7aef\u56fa\u5b9a\u5728\u6c34\u5e73\u7684\u684c\u9762\u4e0a\uff0c\u7136\u540e\u628a\u76ae\u7b4b\u4e2d\u70b9C\u7ad6\u76f4\u5411\u4e0a\u62c9\u53473cm\u5230\u70b9D\uff0c\u5219\u6b64\u65f6\u8be5\u5f39\u6027\u76ae\u7b4b\u88ab\u62c9\u957f\u4e86\uff08\uff09\nChoices:\n(A) 6cm\n(B) 5cm\n(C) 4cm\n(D) 2cm", + "choices": [ + "6cm", + "5cm", + "4cm", + "2cm" + ], + "answer": "2cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 82, + "img_width": 250, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "288": { + "question_id": "288", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which of the following value ranges of \u03bb2 does the percentage of Attack Effectiveness begin to be lower than that of Diversity?\nChoices:\n(A) 0.0 - 0.2\n(B) 0.2 - 0.4\n(C) 0.4 - 0.6\n(D) 0.6 - 0.8\n(E) 0.8 - 1.0", + "choices": [ + "0.0 - 0.2", + "0.2 - 0.4", + "0.4 - 0.6", + "0.6 - 0.8", + "0.8 - 1.0" + ], + "answer": "0.0 - 0.2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.0 - 0.2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 606, + "img_width": 2144, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "290": { + "question_id": "290", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5e73\u884c\u7ebfAB\uff0cCD\u88ab\u76f4\u7ebfAE\u6240\u622a\uff0e\u82e5\u22201\uff1d105\u00b0\uff0c\u5219\u22202\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 75\u00b0\n(B) 85\u00b0\n(C) 95\u00b0\n(D) 105\u00b0", + "choices": [ + "75\u00b0", + "85\u00b0", + "95\u00b0", + "105\u00b0" + ], + "answer": "75\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "75\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 119, + "img_width": 132, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "292": { + "question_id": "292", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Rebecca Purple greater than Olive Drab?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 461, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "294": { + "question_id": "294", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: In Fig. 21-25, the particles have charges $q_1=-q_2=100 \\mathrm{nC}$ and $q_3=-q_4=200 \\mathrm{nC}$, and distance $a=$ $5.0 \\mathrm{~cm}$. What is the $x$ component of the net electrostatic force on particle 3?", + "choices": null, + "answer": "0.17", + "extraction": "-0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 293, + "img_width": 247, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "296": { + "question_id": "296", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The value of f(-3) is ____ the value of f(2)\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "equal to", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 776, + "img_width": 1430, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "298": { + "question_id": "298", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A decrease in rabbits would affect whose food source?\nChoices:\n(A) mountain lion\n(B) producer\n(C) decomposer\n(D) energy", + "choices": [ + "mountain lion", + "producer", + "decomposer", + "energy" + ], + "answer": "mountain lion", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "mountain lion", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 699, + "img_width": 768, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "300": { + "question_id": "300", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{HK}$ and $\\overline{IG}$ are diameters of $\\odot L$. Find $m \\widehat {IHJ}$.\nChoices:\n(A) 59\n(B) 135\n(C) 270\n(D) 301", + "choices": [ + "59", + "135", + "270", + "301" + ], + "answer": "270", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "59", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 492, + "img_width": 510, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "302": { + "question_id": "302", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the green curve?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a logarithmic function", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 300, + "img_width": 531, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "304": { + "question_id": "304", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In the figure above, two line segments meet at a point on line l. If the value of y is equal to the square of the value of x, what is the value of y?", + "choices": null, + "answer": "100", + "extraction": "100", + "prediction": "100", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 247, + "img_width": 431, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "306": { + "question_id": "306", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the bed much larger than the kitten?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "308": { + "question_id": "308", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this function most likely be?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a trigonometric function", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 276, + "img_width": 482, + "language": "english", + "skills": [ + "algebraic reasoning", + "statistical reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "310": { + "question_id": "310", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find z\nChoices:\n(A) 10\n(B) \\frac { 32 } { 3 }\n(C) \\frac { 40 } { 3 }\n(D) \\frac { 50 } { 3 }", + "choices": [ + "10", + "\\frac { 32 } { 3 }", + "\\frac { 40 } { 3 }", + "\\frac { 50 } { 3 }" + ], + "answer": "\\frac { 40 } { 3 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 218, + "img_width": 350, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "312": { + "question_id": "312", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: An Idaho farmer has been monitoring crop prices over time. In 2003, which crop cost the most per cwt?'\nChoices:\n(A) potatoes\n(B) peas\n(C) apples\n(D) canola", + "choices": [ + "potatoes", + "peas", + "apples", + "canola" + ], + "answer": "apples", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "potatoes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 204, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "314": { + "question_id": "314", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Crimson the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 522, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "316": { + "question_id": "316", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, given that points A, B, and C are on \u2299O, \u2220AOB = 100.0, then the degree of \u2220ACB is ()\nChoices:\n(A) 50\u00b0\n(B) 80\u00b0\n(C) 100\u00b0\n(D) 200\u00b0", + "choices": [ + "50\u00b0", + "80\u00b0", + "100\u00b0", + "200\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 105, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "318": { + "question_id": "318", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the area of the figure. Round to the nearest tenth if necessary.\nChoices:\n(A) 191.5\n(B) 1128\n(C) 2256\n(D) 4512", + "choices": [ + "191.5", + "1128", + "2256", + "4512" + ], + "answer": "2256", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "191.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 175, + "img_width": 239, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "320": { + "question_id": "320", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u2220C\uff1d90\u00b0\uff0cAB\uff1d13\uff0cAC\uff1d5\uff0cD\u3001E\u5206\u522b\u662fAC\u3001AB\u7684\u4e2d\u70b9\uff0c\u5219DE\u7684\u957f\u662f\uff08\uff09\nChoices:\n(A) 6.5\n(B) 6\n(C) 5.5\n(D) \\frac{\u221a{119}}{2}", + "choices": [ + "6.5", + "6", + "5.5", + "\\frac{\u221a{119}}{2}" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 90, + "img_width": 170, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "322": { + "question_id": "322", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cA\uff0cB\u4e24\u70b9\u88ab\u6c60\u5858\u9694\u5f00\uff0c\u5728AB\u5916\u9009\u4e00\u70b9C\uff0c\u4f7f\u70b9C\u80fd\u76f4\u63a5\u5230\u8fbe\u70b9A\u548c\u70b9B\uff0c\u8fde\u63a5AC\u548cBC\uff0c\u5e76\u5206\u522b\u627e\u51faAC\u548cBC\u7684\u4e2d\u70b9M\uff0cN\uff0e\u5982\u679c\u6d4b\u5f97MN\uff1d20m\uff0c\u90a3\u4e48A\uff0cB\u4e24\u70b9\u7684\u8ddd\u79bb\u662f\uff08\uff09\nChoices:\n(A) 10m\n(B) 20m\n(C) 35m\n(D) 40m", + "choices": [ + "10m", + "20m", + "35m", + "40m" + ], + "answer": "40m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 107, + "img_width": 148, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "324": { + "question_id": "324", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between highest and lowest value of dark blue bar?", + "choices": null, + "answer": "53", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 726, + "img_width": 800, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "326": { + "question_id": "326", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the pencil to the nearest inch. The pencil is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "7", + "prediction": "7", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 170, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "328": { + "question_id": "328", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of accuracies of the algorithm candy for all the datasets?", + "choices": null, + "answer": "18", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "330": { + "question_id": "330", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny cubes. Subtract all brown balls. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "332": { + "question_id": "332", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A taxi cab driver tracked how many miles he drove each month. How many miles did the taxi cab driver drive in total in January and April? (Unit: miles)", + "choices": null, + "answer": "7873", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 125, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "334": { + "question_id": "334", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer yellow metal tandem bikes in front of the small yellow metallic bicycle than metal bicycles on the left side of the large brown jet?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "336": { + "question_id": "336", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest individual bar in the whole chart?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "338": { + "question_id": "338", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle ABC above, AB = AC, E is the midpoint of line AB, and D is the midpoint of line AC. If AE = x and ED = 4, what is length BC?\nChoices:\n(A) 6\n(B) 8\n(C) 2*x\n(D) 4*x\n(E) 4*x^2", + "choices": [ + "6", + "8", + "2*x", + "4*x", + "4*x^2" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 167, + "img_width": 121, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "340": { + "question_id": "340", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following domains has the most number of BPE Tokens?\nChoices:\n(A) Legal \n(B) Code \n(C) Conversational \n(D) Math \n(E) Science\n(F) Books \n(G) News \n(H) Encyclopedic", + "choices": [ + "Legal ", + "Code ", + "Conversational ", + "Math ", + "Science", + "Books ", + "News ", + "Encyclopedic" + ], + "answer": "Science", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Legal ", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1176, + "img_width": 2142, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "342": { + "question_id": "342", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, which of the following is the greatest?\nChoices:\n(A) a\n(B) b\n(C) c\n(D) d\n(E) e", + "choices": [ + "a", + "b", + "c", + "d", + "e" + ], + "answer": "d", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 299, + "img_width": 405, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "344": { + "question_id": "344", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of metal cars that are left of the tiny matte school bus greater than the number of tiny cyan double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "346": { + "question_id": "346", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the y-intercept of this function?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 339, + "img_width": 341, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "348": { + "question_id": "348", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are the pieces in triangle cuts?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 375, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "350": { + "question_id": "350", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "4", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 89, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "352": { + "question_id": "352", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people will fit in the smaller vehicle?", + "choices": null, + "answer": "1", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "354": { + "question_id": "354", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracies higher than 90?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "356": { + "question_id": "356", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer big motorbikes than rubber choppers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "358": { + "question_id": "358", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the cubes is the same as the unfolded cube?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "A", + "extraction": "D", + "prediction": "D", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 517, + "img_width": 326, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "360": { + "question_id": "360", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If $\\frac{I J}{X J}=\\frac{HJ}{YJ}, m \\angle W X J=130$\r\nand $m \\angle WZG=20,$ find $m \\angle YIZ$\nChoices:\n(A) 40\n(B) 50\n(C) 65\n(D) 110", + "choices": [ + "40", + "50", + "65", + "110" + ], + "answer": "50", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 370, + "img_width": 721, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "362": { + "question_id": "362", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all cyan cylinders. Subtract all tiny purple rubber objects. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "364": { + "question_id": "364", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, and points C and D are on \u2299O. If \u2220ABD = 50.0, then the degree of \u2220BCD is ()\nChoices:\n(A) 30\u00b0\n(B) 35\u00b0\n(C) 40\u00b0\n(D) 45\u00b0", + "choices": [ + "30\u00b0", + "35\u00b0", + "40\u00b0", + "45\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 114, + "img_width": 127, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "366": { + "question_id": "366", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "2", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 320, + "img_width": 250, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "368": { + "question_id": "368", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of yellow matte school buss greater than the number of big yellow metal cars?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "370": { + "question_id": "370", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram of the food web shown, if the number of ferns decrease, the supply of salmon will most likely?\nChoices:\n(A) decrease\n(B) can't tell\n(C) stay same\n(D) increase", + "choices": [ + "decrease", + "can't tell", + "stay same", + "increase" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 680, + "img_width": 880, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "372": { + "question_id": "372", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small gray spheres. Subtract all cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "374": { + "question_id": "374", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms calf and ivory?", + "choices": null, + "answer": "13", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "376": { + "question_id": "376", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all purple matte cubes. Subtract all tiny gray metal cubes. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "378": { + "question_id": "378", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAD\u662f\u25b3ABC\u7684\u4e2d\u7ebf\uff0cE\u4e3aAD\u7684\u4e2d\u70b9\uff0c\u25b3ABE\u7684\u9762\u79ef\u4e3a2\uff0c\u5219\u25b3ABC\u7684\u9762\u79ef\u4e3a\uff08\uff09\nChoices:\n(A) 5\n(B) 6\n(C) 7\n(D) 8", + "choices": [ + "5", + "6", + "7", + "8" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 118, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "380": { + "question_id": "380", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For how many years that the percentage value over 4?", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "382": { + "question_id": "382", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the building through the window at least five stories tall?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 400, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "384": { + "question_id": "384", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 495, + "img_width": 626, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "386": { + "question_id": "386", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x\nChoices:\n(A) 5\n(B) 10\n(C) 10 \\sqrt { 3 }\n(D) 20", + "choices": [ + "5", + "10", + "10 \\sqrt { 3 }", + "20" + ], + "answer": "10 \\sqrt { 3 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 247, + "img_width": 164, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "388": { + "question_id": "388", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Express the ratio of $\\tan M$ as a decimal to the nearest hundredth.\nChoices:\n(A) 0.38\n(B) 0.42\n(C) 0.92\n(D) 2.40", + "choices": [ + "0.38", + "0.42", + "0.92", + "2.40" + ], + "answer": "0.42", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.38", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 209, + "img_width": 342, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "390": { + "question_id": "390", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer jets that are left of the small brown suv than objects right of the big shiny car?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "392": { + "question_id": "392", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Mr. Huffman, a P.E. teacher, wrote down how much weight each of his students could lift. How many people lifted at least 46 pounds? (Unit: people)", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 136, + "img_width": 197, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "394": { + "question_id": "394", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following environments has the least GPU days for training?\nChoices:\n(A) HomeGrid\n(B) Msgr S1\n(C) Msgr S2\n(D) Msgr S3\n(E) VLN\n(F) LangRoom", + "choices": [ + "HomeGrid", + "Msgr S1", + "Msgr S2", + "Msgr S3", + "VLN", + "LangRoom" + ], + "answer": "LangRoom", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "HomeGrid", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 858, + "img_width": 1854, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "396": { + "question_id": "396", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, if all the algae dies then water flea population will\nChoices:\n(A) remains the same\n(B) decrease\n(C) increase\n(D) NA", + "choices": [ + "remains the same", + "decrease", + "increase", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "remains the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 576, + "img_width": 720, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "398": { + "question_id": "398", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 942, + "img_width": 727, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "400": { + "question_id": "400", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At which Episode ID does the Retroformer attain its peak Success rate (%)?\nChoices:\n(A) 1.0\n(B) 1.5\n(C) 2.0\n(D) 2.5\n(E) 3.0\n(F) 3.5\n(G) 4.0", + "choices": [ + "1.0", + "1.5", + "2.0", + "2.5", + "3.0", + "3.5", + "4.0" + ], + "answer": "4.0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1.0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 942, + "img_width": 1196, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "402": { + "question_id": "402", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the food chain diagram below, which animal would most directly lack food if Grasshoppers get exterminated?\nChoices:\n(A) Rabbit\n(B) Deer\n(C) Frogs\n(D) Wolf", + "choices": [ + "Rabbit", + "Deer", + "Frogs", + "Wolf" + ], + "answer": "Frogs", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Rabbit", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 735, + "img_width": 909, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "404": { + "question_id": "404", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the following schedule. Which activity begins at 11.50 A.M.?'\nChoices:\n(A) figure skating practice\n(B) private class\n(C) adult class\n(D) children's class", + "choices": [ + "figure skating practice", + "private class", + "adult class", + "children's class" + ], + "answer": "children's class", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "figure skating practice", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 217, + "img_width": 325, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "406": { + "question_id": "406", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many snowmen are there?", + "choices": null, + "answer": "15", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 183, + "img_width": 714, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "408": { + "question_id": "408", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find z.\nChoices:\n(A) 6\n(B) 6 \\sqrt { 2 }\n(C) 6 \\sqrt { 3 }\n(D) 6 \\sqrt { 5 }", + "choices": [ + "6", + "6 \\sqrt { 2 }", + "6 \\sqrt { 3 }", + "6 \\sqrt { 5 }" + ], + "answer": "6 \\sqrt { 5 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 238, + "img_width": 362, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "410": { + "question_id": "410", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the perimeter of $\\triangle D E F,$ if $\\triangle D E F \\sim \\triangle C B F,$ perimeter of $\\triangle C B F=27, D F=6,$ and $F C=8$\nChoices:\n(A) 20.25\n(B) 21\n(C) 27\n(D) 36", + "choices": [ + "20.25", + "21", + "27", + "36" + ], + "answer": "20.25", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20.25", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 226, + "img_width": 405, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "412": { + "question_id": "412", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Tanner has $35. Does he have enough to buy a black jacket and a pair of shorts?'\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 192, + "img_width": 235, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "414": { + "question_id": "414", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If $ST=8, TR=4$, and $PT=6$, find $QR$.\nChoices:\n(A) 6\n(B) 8\n(C) 9\n(D) 10", + "choices": [ + "6", + "8", + "9", + "10" + ], + "answer": "9", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 386, + "img_width": 509, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "416": { + "question_id": "416", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the highest volume written on the blender?", + "choices": null, + "answer": "800", + "extraction": "1000", + "prediction": "1000", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1024, + "img_width": 768, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "418": { + "question_id": "418", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the number of grasshoppers decreases, what will the population of spiders most likely do?\nChoices:\n(A) remain the same\n(B) increase\n(C) decrease\n(D) NA", + "choices": [ + "remain the same", + "increase", + "decrease", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "remain the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "420": { + "question_id": "420", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the lowest value on the Y axis?", + "choices": null, + "answer": "0.0", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 1763, + "img_width": 2256, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "422": { + "question_id": "422", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "10", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "424": { + "question_id": "424", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the food half eaten?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 428, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "426": { + "question_id": "426", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u82e5DE\u662f\u25b3ABC\u7684\u4e2d\u4f4d\u7ebf\uff0c\u25b3ADE\u7684\u5468\u957f\u4e3a1\uff0c\u5219\u25b3ABC\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4", + "choices": [ + "1", + "2", + "3", + "4" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 154, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "428": { + "question_id": "428", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "28", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 968, + "img_width": 1259, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "430": { + "question_id": "430", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The derivative of f(x) at x=0 is ____ that at x=5\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "smaller than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 393, + "img_width": 552, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "432": { + "question_id": "432", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of undernourished male children greater than 0.4 %?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1085, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "434": { + "question_id": "434", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, side AC of triangle ABC is on line l. What is x in terms of k?\nChoices:\n(A) 60-k\n(B) k\n(C) 60+k\n(D) 120-k\n(E) 120-2*k", + "choices": [ + "60-k", + "k", + "60+k", + "120-k", + "120-2*k" + ], + "answer": "60-k", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60-k", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 157, + "img_width": 215, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "436": { + "question_id": "436", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracy lower than 8 in at least one dataset?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "438": { + "question_id": "438", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "13", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 367, + "img_width": 329, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "440": { + "question_id": "440", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the white plate half full?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 640, + "img_width": 480, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "442": { + "question_id": "442", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many objects are preferred by more than 7 people in at least one category?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "444": { + "question_id": "444", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the two genders?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "446": { + "question_id": "446", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u70b9D\u662f\u25b3ABC\u7684\u5185\u5fc3\uff0c\u8fde\u63a5DB\uff0cDC\uff0c\u8fc7\u70b9D\u4f5cEF\u2225BC\u5206\u522b\u4ea4AB\u3001AC\u4e8e\u70b9E\u3001F\uff0c\u82e5BE+CF\uff1d8\uff0c\u5219EF\u7684\u957f\u5ea6\u4e3a\uff08\uff09\nChoices:\n(A) 4\n(B) 5\n(C) 8\n(D) 16", + "choices": [ + "4", + "5", + "8", + "16" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 105, + "img_width": 144, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "448": { + "question_id": "448", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year recorded the highest share of Urban secondary schools with access to electricity in India?", + "choices": null, + "answer": "2016", + "extraction": "2012", + "prediction": "2012", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "450": { + "question_id": "450", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If all the grass died, what would be most affected?\nChoices:\n(A) garter snakes\n(B) hognose snakes\n(C) hawks\n(D) grasshoppers", + "choices": [ + "garter snakes", + "hognose snakes", + "hawks", + "grasshoppers" + ], + "answer": "grasshoppers", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "garter snakes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "452": { + "question_id": "452", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the image, what is the most likely equilibrium population count?\nChoices:\n(A) 40\n(B) 60\n(C) 80\n(D) 100", + "choices": [ + "40", + "60", + "80", + "100" + ], + "answer": "80", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 366, + "img_width": 441, + "language": "english", + "skills": [ + "algebraic reasoning", + "statistical reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "454": { + "question_id": "454", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "456": { + "question_id": "456", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "458": { + "question_id": "458", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: If you add the two visible numbers, on the jerseys, what is the total sum?", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "460": { + "question_id": "460", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If there were fewer leaves in this ecosystem, the first organism to experience change as a result would be:\nChoices:\n(A) Frogs\n(B) Crickets\n(C) Snakes\n(D) Hawks", + "choices": [ + "Frogs", + "Crickets", + "Snakes", + "Hawks" + ], + "answer": "Crickets", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Frogs", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 720, + "img_width": 960, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "462": { + "question_id": "462", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values larger than 100?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "464": { + "question_id": "464", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer for the missing picture.\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4\n(E) 5\n(F) 6", + "choices": [ + "1", + "2", + "3", + "4", + "5", + "6" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 1316, + "img_width": 1000, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "466": { + "question_id": "466", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Periwinkle intersect Yellow Green?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 487, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "468": { + "question_id": "468", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people prefer the most preferred object?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "470": { + "question_id": "470", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following models has the lowest KS Rollout Loss overall?\nChoices:\n(A) Baseline\n(B) Diffusion\n(C) PDE-Refiner\n(D) Pushforward", + "choices": [ + "Baseline", + "Diffusion", + "PDE-Refiner", + "Pushforward" + ], + "answer": "PDE-Refiner", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Baseline", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 854, + "img_width": 1422, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "472": { + "question_id": "472", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "474": { + "question_id": "474", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many miles per gallon do an average city bus get?", + "choices": null, + "answer": "25", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 333, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "476": { + "question_id": "476", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If frogs were removed from this environment what animal would potentially see an increase in its population?\nChoices:\n(A) crickets\n(B) deer\n(C) snakes\n(D) hawks", + "choices": [ + "crickets", + "deer", + "snakes", + "hawks" + ], + "answer": "crickets", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "crickets", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 405, + "img_width": 518, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "478": { + "question_id": "478", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the diamond ABCD, two diagonal lines AC = 12.0, BD = 16.0, then the edge length of this diamond is ()\nChoices:\n(A) 10\n(B) 8\n(C) 6\n(D) 5", + "choices": [ + "10", + "8", + "6", + "5" + ], + "answer": "10", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 97, + "img_width": 125, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "480": { + "question_id": "480", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny blue metal bicycles behind the small sedan less than the number of purple fighters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "482": { + "question_id": "482", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, triangle ABC is inscribed in the circle with center O and diameter AC. If AB = AO, what is the degree measure of angle ABO?\nChoices:\n(A) 15*\\degree\n(B) 30*\\degree\n(C) 45*\\degree\n(D) 60*\\degree\n(E) 90*\\degree", + "choices": [ + "15*\\degree", + "30*\\degree", + "45*\\degree", + "60*\\degree", + "90*\\degree" + ], + "answer": "60*\\degree", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15*\\degree", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 134, + "img_width": 143, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "484": { + "question_id": "484", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "486": { + "question_id": "486", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0cAB\uff1d5\uff0cAD\uff1d7\uff0c\u5219ABCD\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 12\n(B) 14\n(C) 35\n(D) 24", + "choices": [ + "12", + "14", + "35", + "24" + ], + "answer": "24", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "12", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 79, + "img_width": 156, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "488": { + "question_id": "488", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown things. Subtract all tiny blue metallic objects. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "490": { + "question_id": "490", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9A\u3001C\u3001B\u5728\u540c\u4e00\u76f4\u7ebf\u4e0a\uff0cDC\u22a5EC\uff0c\u82e5\u2220BCD\uff1d40\u00b0\uff0c\u5219\u2220ACE\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 30\u00b0\n(B) 40\u00b0\n(C) 50\u00b0\n(D) 60\u00b0", + "choices": [ + "30\u00b0", + "40\u00b0", + "50\u00b0", + "60\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 88, + "img_width": 155, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "492": { + "question_id": "492", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the \u2299O with a radius of 2.0, C is a point on the extended line of the diameter AB, CD is tangent to the circle at point D. Connect AD, given that \u2220DAC = 30.0, the length of the line segment CD is ()\nChoices:\n(A) 1\n(B) \u221a{3}\n(C) 2\n(D) 2\u221a{3}", + "choices": [ + "1", + "\u221a{3}", + "2", + "2\u221a{3}" + ], + "answer": "2\u221a{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 158, + "img_width": 203, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "494": { + "question_id": "494", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "3", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 97, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "496": { + "question_id": "496", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "20", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "498": { + "question_id": "498", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the water half full?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 478, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "500": { + "question_id": "500", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1236, + "img_width": 987, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "502": { + "question_id": "502", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tandem bikes that are behind the brown metal bicycle than matte trucks on the left side of the green object?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "504": { + "question_id": "504", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, D and E are the points on the edges AB and AC of \u25b3ABC, DE \u2225 BC, if AD:DB=1.0:3.0, AE = 2.0, then the length of AC is ()\nChoices:\n(A) 10\n(B) 8\n(C) 6\n(D) 4", + "choices": [ + "10", + "8", + "6", + "4" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 86, + "img_width": 117, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "506": { + "question_id": "506", + "query": "Hint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?", + "choices": null, + "answer": "[2014, 2016]", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "true_false": false, + "question_type": "free_form", + "answer_type": "list", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "508": { + "question_id": "508", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The owner of a bed and breakfast inn recalled how many guests the inn had hosted each day. What is the median of the numbers?'", + "choices": null, + "answer": "5", + "extraction": "5", + "prediction": "5", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 241, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "510": { + "question_id": "510", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220C = 90.0, AC = 4.0, AB = 5.0, then the value of sinB is ()\nChoices:\n(A) \\frac{2}{3}\n(B) \\frac{3}{5}\n(C) \\frac{3}{4}\n(D) \\frac{4}{5}", + "choices": [ + "\\frac{2}{3}", + "\\frac{3}{5}", + "\\frac{3}{4}", + "\\frac{4}{5}" + ], + "answer": "\\frac{4}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{2}{3}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 186, + "img_width": 119, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "512": { + "question_id": "512", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the y coordinate of the center of mass of the isosceles right triangle of uniform areal density shown in Figure 9-C?", + "choices": null, + "answer": "0.24", + "extraction": "0.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 356, + "img_width": 497, + "language": "english", + "skills": [ + "geometry reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "514": { + "question_id": "514", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you wanted the leaf with the least main veins, which would you choose?\nChoices:\n(A) 3 main veins\n(B) pinnate\n(C) reticulate\n(D) palmate", + "choices": [ + "3 main veins", + "pinnate", + "reticulate", + "palmate" + ], + "answer": "3 main veins", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3 main veins", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 236, + "img_width": 559, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "516": { + "question_id": "516", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are most the stepping stones square?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 339, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "518": { + "question_id": "518", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2211, + "img_width": 2838, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "520": { + "question_id": "520", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Magenta have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 741, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "522": { + "question_id": "522", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 86, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "524": { + "question_id": "524", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The Kingwood Ski Resort asked its guests how many times they went sledding last winter. How many guests went sledding more than 2 times?'", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 163, + "img_width": 351, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "526": { + "question_id": "526", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) slide\n(B) flip\n(C) turn", + "choices": [ + "slide", + "flip", + "turn" + ], + "answer": "slide", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "slide", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 104, + "img_width": 253, + "language": "english", + "skills": [ + "geometry reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "528": { + "question_id": "528", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u2225CD\uff0cBD\u22a5CF\uff0c\u5782\u8db3\u4e3aB\uff0c\u2220ABF\uff1d35\u00b0\uff0c\u5219\u2220BDC\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 25\u00b0\n(B) 35\u00b0\n(C) 45\u00b0\n(D) 55\u00b0", + "choices": [ + "25\u00b0", + "35\u00b0", + "45\u00b0", + "55\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 135, + "img_width": 194, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "530": { + "question_id": "530", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The advertising agency counted the number of billboards in each city in the state. How many cities have fewer than 70 billboards? (Unit: cities)", + "choices": null, + "answer": "9", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 180, + "img_width": 140, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "532": { + "question_id": "532", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer gray trucks that are in front of the large aeroplane than big yellow metal objects in front of the purple object?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "534": { + "question_id": "534", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of stunted female children greater than the average percentage of stunted female children taken over all years ?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 883, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "536": { + "question_id": "536", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A, B, and C are on \u2299O, if \u2220C = 35.0, then \u2220AOB = ()\nChoices:\n(A) 17.5\u00b0\n(B) 35\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "17.5\u00b0", + "35\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "70\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "17.5\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 105, + "img_width": 115, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "538": { + "question_id": "538", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the two concentric circles, the chord AB of the great circle is tangent to the small circle at point C. If AB = 6.0, the area of \u200b\u200bthe ring is ()\nChoices:\n(A) 9\u03c0\n(B) 6\u03c0\n(C) 3\u03c0\n(D) \u03c0", + "choices": [ + "9\u03c0", + "6\u03c0", + "3\u03c0", + "\u03c0" + ], + "answer": "9\u03c0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "9\u03c0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 115, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "540": { + "question_id": "540", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5", + "choices": [ + "3/11", + "8/11", + "6/11", + "3/5" + ], + "answer": "3/11", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3/11", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 103, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "542": { + "question_id": "542", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many models in the figure achieve an Acc score greater than 60?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scatter plot", + "grade": "college", + "img_height": 1358, + "img_width": 1690, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "544": { + "question_id": "544", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the total percentage of people who say that they do either less or more often than the usual amount of exercise during the coronavirus pandemic in the United States as of April 2020?", + "choices": null, + "answer": "44", + "extraction": "77", + "prediction": "77", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "546": { + "question_id": "546", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the overall ratio of male to female?", + "choices": null, + "answer": "1", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "548": { + "question_id": "548", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer cyan jets than big buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "550": { + "question_id": "550", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the accuracy of the algorithm with highest accuracy?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "552": { + "question_id": "552", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many queries have a p-value lower than 0.50?", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 330, + "img_width": 1726, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "554": { + "question_id": "554", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Burlywood the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 488, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "556": { + "question_id": "556", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer large red metallic things that are on the left side of the cyan shiny scooter than things that are in front of the small jet?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "558": { + "question_id": "558", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "560": { + "question_id": "560", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Salmon the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 514, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "562": { + "question_id": "562", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small green cubes. Subtract all large cylinders. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "564": { + "question_id": "564", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest and the lowest time required to import ?", + "choices": null, + "answer": "4", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1056, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "566": { + "question_id": "566", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5df2\u77e5\u25b3ABC\u224c\u25b3DEF\uff0cCD\u5e73\u5206\u2220BCA\uff0c\u82e5\u2220A\uff1d22\u00b0\uff0c\u2220CGF\uff1d88\u00b0\uff0c\u5219\u2220E\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 26\u00b0\n(B) 28\u00b0\n(C) 30\u00b0\n(D) 34\u00b0", + "choices": [ + "26\u00b0", + "28\u00b0", + "30\u00b0", + "34\u00b0" + ], + "answer": "26\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "26\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 89, + "img_width": 89, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "568": { + "question_id": "568", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For an economics project, Colleen determined the cost of ferry rides for bicycles and cars. How much higher is the fare for a car on the Mukilteu-Clinton ferry than on the Southport-Fort Fisher ferry? (Unit: $)", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 349, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "570": { + "question_id": "570", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all purple matte blocks. Subtract all brown things. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "572": { + "question_id": "572", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When does the function start decreasing?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 316, + "img_width": 400, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "574": { + "question_id": "574", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do you see the figures inside these boxes? They form a pattern. Choose the figure in the answer row below that continues the pattern.\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4\n(E) 5", + "choices": [ + "1", + "2", + "3", + "4", + "5" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 378, + "img_width": 868, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "576": { + "question_id": "576", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the human brain is the largest and most anterior part of each cerebral hemisphere?\nChoices:\n(A) motor cortex\n(B) occipital lobe\n(C) temporal lobe\n(D) frontal lobe", + "choices": [ + "motor cortex", + "occipital lobe", + "temporal lobe", + "frontal lobe" + ], + "answer": "frontal lobe", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "motor cortex", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 625, + "img_width": 768, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "578": { + "question_id": "578", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9567", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 285, + "img_width": 637, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "580": { + "question_id": "580", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Slate the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 650, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "582": { + "question_id": "582", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Green greater than Rebecca Purple?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 582, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "584": { + "question_id": "584", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A philanthropic organization compared the amounts of money that its members donated to certain causes. Who donated more money to arts education, Aubrey or Connor?'\nChoices:\n(A) Connor\n(B) Aubrey", + "choices": [ + "Connor", + "Aubrey" + ], + "answer": "Connor", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Connor", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 391, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "586": { + "question_id": "586", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220BAC = 90.0, rotate \u25b3ABC clockwise around point A by 90.0 to obtain \u25b3AB\u2032C\u2032 (the corresponding point of point B is point B\u2032, and the corresponding point of point C is point C \u2032), connect CC\u2032. If \u2220CC\u2032B\u2032 = 32.0, then the size of \u2220AC\u2032B\u2032 is ()\nChoices:\n(A) 32\u00b0\n(B) 45\u00b0\n(C) 13\u00b0\n(D) 30\u00b0", + "choices": [ + "32\u00b0", + "45\u00b0", + "13\u00b0", + "30\u00b0" + ], + "answer": "13\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "32\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 75, + "img_width": 80, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "588": { + "question_id": "588", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year has more actual total income?", + "choices": null, + "answer": "1982", + "extraction": "1979", + "prediction": "1979", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2281, + "img_width": 1785, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "590": { + "question_id": "590", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "13", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 264, + "img_width": 376, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "592": { + "question_id": "592", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the global maximum of this function?", + "choices": null, + "answer": "4", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 318, + "img_width": 283, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "594": { + "question_id": "594", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When does the expenditure per student in Jamaica have the greatest increase?", + "choices": null, + "answer": "2005", + "extraction": "2005", + "prediction": "2005", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "596": { + "question_id": "596", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dodger Blue the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 407, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "598": { + "question_id": "598", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most curved beak species?\nChoices:\n(A) iiki\n(B) swallow-tanager\n(C) cliff swallow\n(D) hawfinch", + "choices": [ + "iiki", + "swallow-tanager", + "cliff swallow", + "hawfinch" + ], + "answer": "iiki", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "iiki", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 463, + "img_width": 593, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "600": { + "question_id": "600", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "A", + "extraction": "D", + "prediction": "D", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 637, + "img_width": 424, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "602": { + "question_id": "602", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Rectangle ABCD is subdivided into two identical square regions, as in the figure above. If the area of each square is 9, what is the perimeter of ABCD?", + "choices": null, + "answer": "18", + "extraction": "36", + "prediction": "36", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 219, + "img_width": 435, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "604": { + "question_id": "604", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "606": { + "question_id": "606", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the rectangle?", + "choices": null, + "answer": "10", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 209, + "img_width": 335, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "608": { + "question_id": "608", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does South Carolina have the highest value in the South ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 560, + "img_width": 775, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "610": { + "question_id": "610", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, P, Q, and R lie on the same line. P is the center of the larger circle, and Q is the center of the smaller circle. If the radius of the larger circle is 4, what is the radius of the smaller circle?\nChoices:\n(A) 1\n(B) 2\n(C) 4\n(D) 8\n(E) 16", + "choices": [ + "1", + "2", + "4", + "8", + "16" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 353, + "img_width": 411, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "612": { + "question_id": "612", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue metal things. Subtract all tiny objects. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "4", + "prediction": "4", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "614": { + "question_id": "614", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 661, + "img_width": 915, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "616": { + "question_id": "616", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the ratio of instagram to google?", + "choices": null, + "answer": "2", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "618": { + "question_id": "618", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "620": { + "question_id": "620", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 199, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "622": { + "question_id": "622", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0cD\u662fBC\u4e0a\u7684\u70b9\uff0c\u4e14BD\uff1d2\uff0cDC\uff1d1\uff0cS\u25b3ACD\uff1d12\uff0c\u90a3\u4e48S\u25b3ABC\u7b49\u4e8e\uff08\uff09\nChoices:\n(A) 30\n(B) 36\n(C) 72\n(D) 24", + "choices": [ + "30", + "36", + "72", + "24" + ], + "answer": "36", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 146, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "624": { + "question_id": "624", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the total unemployed labor force in Upper middle income greater than 1.6 %?", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1344, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "626": { + "question_id": "626", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown objects. Subtract all large purple cylinders. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "628": { + "question_id": "628", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0c\u2220ABC\u7684\u5e73\u5206\u7ebf\u4ea4AD\u4e8e\u70b9E\uff0c\u2220BCD\u7684\u5e73\u5206\u7ebf\u4ea4AD\u4e8e\u70b9F\uff0c\u82e5AB\uff1d3\uff0cAD\uff1d4\uff0c\u5219EF\u7684\u957f\u662f\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 2.5\n(D) 3", + "choices": [ + "1", + "2", + "2.5", + "3" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 151, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "630": { + "question_id": "630", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Find the size of angle MBD in the figure below.", + "choices": null, + "answer": "72", + "extraction": "62", + "prediction": "62", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 195, + "img_width": 340, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "632": { + "question_id": "632", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the total value of the More bar?", + "choices": null, + "answer": "52", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 350, + "img_width": 309, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "634": { + "question_id": "634", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfAB\uff0cCD\u4ea4\u4e8e\u70b9O\uff0e\u5c04\u7ebfOE\u5e73\u5206\u2220BOC\uff0c\u82e5\u2220AOD\uff1d70\u00b0\uff0c\u5219\u2220AOE\u7b49\u4e8e\uff08\uff09\nChoices:\n(A) 35\u00b0\n(B) 110\u00b0\n(C) 135\u00b0\n(D) 145\u00b0", + "choices": [ + "35\u00b0", + "110\u00b0", + "135\u00b0", + "145\u00b0" + ], + "answer": "145\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 141, + "img_width": 173, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "636": { + "question_id": "636", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "34", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 117, + "img_width": 92, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "638": { + "question_id": "638", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the under-5 male mortality rate greater than the average under-5 male mortality rate taken over all years ?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 880, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "640": { + "question_id": "640", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $\\widehat{\\mathrm{WN}}$ if $\\triangle \\mathrm{IWN}$ is equilateral and $W N=5$\nChoices:\n(A) \\frac { 3 } { 5 } \\pi\n(B) \\frac { 5 } { 3 } \\pi\n(C) 5 \\pi\n(D) 10 \\pi", + "choices": [ + "\\frac { 3 } { 5 } \\pi", + "\\frac { 5 } { 3 } \\pi", + "5 \\pi", + "10 \\pi" + ], + "answer": "\\frac { 5 } { 3 } \\pi", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac { 3 } { 5 } \\pi", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 222, + "img_width": 309, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "642": { + "question_id": "642", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Line AB is tangent to circle O. If AB = 8 and OB = 10, find the diameter of the circle.\nChoices:\n(A) 4\n(B) 6\n(C) 8\n(D) 10\n(E) 12", + "choices": [ + "4", + "6", + "8", + "10", + "12" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 443, + "img_width": 347, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "644": { + "question_id": "644", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the missing number in the picture?\nChoices:\n(A) 6\n(B) 8\n(C) 10\n(D) 11", + "choices": [ + "6", + "8", + "10", + "11" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 452, + "img_width": 494, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "646": { + "question_id": "646", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The employee at the department store counted the number of ties on each tie rack. How many racks have at least 0 ties? (Unit: racks)", + "choices": null, + "answer": "25", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 224, + "img_width": 131, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "648": { + "question_id": "648", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the minimum value of this function?", + "choices": null, + "answer": "-1", + "extraction": "-2", + "prediction": "-2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 296, + "img_width": 600, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "650": { + "question_id": "650", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the sum of maximum employment rate and minimum employment?", + "choices": null, + "answer": "31.3", + "extraction": "100.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "652": { + "question_id": "652", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 365, + "img_width": 845, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "654": { + "question_id": "654", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer yellow metallic motorbikes that are in front of the small brown metal dirtbike than big yellow dirtbikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "656": { + "question_id": "656", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Maroon the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 776, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "658": { + "question_id": "658", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 115, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "660": { + "question_id": "660", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer small fighters than yellow matte tandem bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "662": { + "question_id": "662", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much more accurate is the most accurate algorithm compared the least accurate algorithm?", + "choices": null, + "answer": "80", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "664": { + "question_id": "664", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest number of responses for Question 10, for any given % of inside sales?", + "choices": null, + "answer": "17", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2245, + "img_width": 1692, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "666": { + "question_id": "666", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red objects. Subtract all big green things. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "668": { + "question_id": "668", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does the first symbol in the legend represent the smallest category ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 560, + "img_width": 775, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "670": { + "question_id": "670", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which date of Meeting was the most number of shares transferred?\nChoices:\n(A) 04/06/2005\n(B) 04/02/2005\n(C) 04/05/2005\n(D) 04/03/2005\n(E) 04/04/2005", + "choices": [ + "04/06/2005", + "04/02/2005", + "04/05/2005", + "04/03/2005", + "04/04/2005" + ], + "answer": "04/02/2005", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "04/06/2005", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2135, + "img_width": 1582, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "672": { + "question_id": "672", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the twig to the nearest inch. The twig is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 169, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "674": { + "question_id": "674", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, CDE is an equilateral triangle and ABCE is a square with an area of 1. What is the perimeter of polygon ABCDE?\nChoices:\n(A) 4\n(B) 5\n(C) 6\n(D) 7\n(E) 8", + "choices": [ + "4", + "5", + "6", + "7", + "8" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 89, + "img_width": 125, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "676": { + "question_id": "676", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "678": { + "question_id": "678", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x\nChoices:\n(A) 21\n(B) 34\n(C) 58\n(D) 67", + "choices": [ + "21", + "34", + "58", + "67" + ], + "answer": "58", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "21", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 149, + "img_width": 267, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "680": { + "question_id": "680", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "5", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 303, + "img_width": 440, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "682": { + "question_id": "682", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, if all the grass dies then population of squirrel will\nChoices:\n(A) decrease\n(B) remains the same\n(C) increase\n(D) NA", + "choices": [ + "decrease", + "remains the same", + "increase", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 592, + "img_width": 864, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "684": { + "question_id": "684", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{CH} \\cong \\overline{KJ}$. Find $x$.\nChoices:\n(A) 27\n(B) 54\n(C) 55\n(D) 83", + "choices": [ + "27", + "54", + "55", + "83" + ], + "answer": "55", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "27", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 444, + "img_width": 608, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "686": { + "question_id": "686", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function invertible?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 442, + "img_width": 731, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "688": { + "question_id": "688", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the minimum age group shown in the \u2018plots\u2019?\nChoices:\n(A) 11-15\n(B) 21-25\n(C) 6-10\n(D) 16-20\n(E) 0-5", + "choices": [ + "11-15", + "21-25", + "6-10", + "16-20", + "0-5" + ], + "answer": "0-5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "11-15", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2136, + "img_width": 3160, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "690": { + "question_id": "690", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram above, lines M and N are parallel. All of the following are true except\nChoices:\n(A) a + b = j + l\n(B) g = h\n(C) c + f = f + b\n(D) g + e + f + h = 360\n(E) d + e = f + j", + "choices": [ + "a + b = j + l", + "g = h", + "c + f = f + b", + "g + e + f + h = 360", + "d + e = f + j" + ], + "answer": "d + e = f + j", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a + b = j + l", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 558, + "img_width": 625, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "692": { + "question_id": "692", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the given food chain if grasses dried up in summer, what is likely to happen?\nChoices:\n(A) Grasshoppers will decrease.\n(B) shrews will become extinct\n(C) owls will increase.\n(D) None of the above", + "choices": [ + "Grasshoppers will decrease.", + "shrews will become extinct", + "owls will increase.", + "None of the above" + ], + "answer": "Grasshoppers will decrease.", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Grasshoppers will decrease.", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 189, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "694": { + "question_id": "694", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u83f1\u5f62ABCD\u4e2d\uff0cM\u3001N\u5206\u522b\u662fBC\u548cCD\u7684\u4e2d\u70b9\uff0cNP\u22a5AB\u4e8e\u70b9P\uff0c\u8fde\u63a5MP\uff0e\u82e5\u2220DAB\uff1d40\u00b0\uff0c\u5219\u2220MPB\uff1d\uff08\uff09\nChoices:\n(A) 125\u00b0\n(B) 120\u00b0\n(C) 115\u00b0\n(D) 110\u00b0", + "choices": [ + "125\u00b0", + "120\u00b0", + "115\u00b0", + "110\u00b0" + ], + "answer": "110\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "125\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 85, + "img_width": 158, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "696": { + "question_id": "696", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Erica has $1,525.00. Does she have enough to buy a motorcycle and a canoe?'\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 192, + "img_width": 214, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "698": { + "question_id": "698", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the triangle in the figure above, what is the value of x?\nChoices:\n(A) 2*\\sqrt{3}\n(B) 6*\\sqrt{2}\n(C) 6*\\sqrt{3}\n(D) 6\n(E) 12", + "choices": [ + "2*\\sqrt{3}", + "6*\\sqrt{2}", + "6*\\sqrt{3}", + "6", + "12" + ], + "answer": "2*\\sqrt{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2*\\sqrt{3}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 376, + "img_width": 615, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "700": { + "question_id": "700", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u2299O\u662f\u25b3ABC\u7684\u5916\u63a5\u5706\uff0cAB\uff1dBC\uff1d4\uff0c\u628a\u5f27AB\u6cbf\u5f26AB\u5411\u4e0b\u6298\u53e0\u4ea4BC\u4e8e\u70b9D\uff0c\u82e5\u70b9D\u4e3aBC\u4e2d\u70b9\uff0c\u5219AC\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 2\u221a{2}\n(D) \u221a{6}", + "choices": [ + "1", + "2", + "2\u221a{2}", + "\u221a{6}" + ], + "answer": "2\u221a{2}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 132, + "img_width": 144, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "702": { + "question_id": "702", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is cumulative increase in weight ( in grams) for \"GROUP A\" in third week ( give an approximate value) ?", + "choices": null, + "answer": "400", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2237, + "img_width": 1754, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "704": { + "question_id": "704", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which two puzzle pieces form the larger square?\nChoices:\n(A) 1 & 2\n(B) 1 & 3\n(C) 1 & 4\n(D) 2 & 3\n(E) 2 & 4", + "choices": [ + "1 & 2", + "1 & 3", + "1 & 4", + "2 & 3", + "2 & 4" + ], + "answer": "1 & 3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1 & 2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 440, + "img_width": 396, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "706": { + "question_id": "706", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the image of the dot (8,-2) under a clockwise rotation by 270\u00b0 about the origin.\"\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "C", + "extraction": "B", + "prediction": "B", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 432, + "img_width": 438, + "language": "english", + "skills": [ + "logical reasoning", + "geometry reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "708": { + "question_id": "708", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the light source P is directly above the crossbar AB, the shadow of AB under the light is CD, AB \u2225 CD, AB = 2.0, CD = 5.0, the distance between point P and CD is 3.0, then the distance between AB and CD is ().\nChoices:\n(A) \\frac{6}{5}\n(B) \\frac{7}{6}\n(C) \\frac{9}{5}\n(D) \\frac{15}{2}", + "choices": [ + "\\frac{6}{5}", + "\\frac{7}{6}", + "\\frac{9}{5}", + "\\frac{15}{2}" + ], + "answer": "\\frac{9}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{6}{5}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 156, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "710": { + "question_id": "710", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1555, + "img_width": 2293, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "712": { + "question_id": "712", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "9", + "extraction": "9", + "prediction": "9", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 244, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "714": { + "question_id": "714", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of large brown rubber motorbikes in front of the big motorbike greater than the number of big green sedans?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "716": { + "question_id": "716", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find y.\nChoices:\n(A) 16 \\sqrt { 2 }\n(B) 16 \\sqrt { 3 }\n(C) 32\n(D) 16 \\sqrt { 5 }", + "choices": [ + "16 \\sqrt { 2 }", + "16 \\sqrt { 3 }", + "32", + "16 \\sqrt { 5 }" + ], + "answer": "16 \\sqrt { 5 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "16 \\sqrt { 2 }", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 196, + "img_width": 427, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "718": { + "question_id": "718", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Jeffrey is the proud owner of an eclectic bow tie collection. He keeps track of how many bow ties he has, and organizes them by pattern and material. What is the probability that a randomly selected bow tie is designed with swirls and is made of velvet? Simplify any fractions.'", + "choices": null, + "answer": "0.21", + "extraction": "0.33", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 94, + "img_width": 215, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "720": { + "question_id": "720", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When does the function value first reach 2?", + "choices": null, + "answer": "2", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 350, + "img_width": 362, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "722": { + "question_id": "722", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Deep Sky Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 677, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "724": { + "question_id": "724", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Rebecca Purple have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 638, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "726": { + "question_id": "726", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x. Assume that any segment that appears to be tangent is tangent.\nChoices:\n(A) 10\n(B) 30\n(C) 90\n(D) 120", + "choices": [ + "10", + "30", + "90", + "120" + ], + "answer": "10", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 199, + "img_width": 228, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "728": { + "question_id": "728", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 69, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "730": { + "question_id": "730", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In which year the market share of KLA is highest?", + "choices": null, + "answer": "2019", + "extraction": "2013", + "prediction": "2013", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "732": { + "question_id": "732", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism would be most affected if there was a shortage of plants?\nChoices:\n(A) Grasshopper\n(B) Snake\n(C) Mouse\n(D) Hawk", + "choices": [ + "Grasshopper", + "Snake", + "Mouse", + "Hawk" + ], + "answer": "Grasshopper", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Grasshopper", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1080, + "img_width": 1152, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "734": { + "question_id": "734", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer double buss that are behind the aeroplane than things on the left side of the yellow double bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "736": { + "question_id": "736", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5df2\u77e5\u76f4\u7ebfa\u2225b\uff0c\u76f4\u89d2\u4e09\u89d2\u5f62ABC\u4e2d\uff0c\u2220C\uff1d90\u00b0\uff0c\u82e5\u2220B\uff1d58\u00b0\uff0c\u90a3\u4e48\u22201\ufe63\u22202\uff1d\uff08\uff09\nChoices:\n(A) 28\u00b0\n(B) 30\u00b0\n(C) 32\u00b0\n(D) 58\u00b0", + "choices": [ + "28\u00b0", + "30\u00b0", + "32\u00b0", + "58\u00b0" + ], + "answer": "32\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "28\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 154, + "img_width": 226, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "738": { + "question_id": "738", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function continuous?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 268, + "img_width": 383, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "740": { + "question_id": "740", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What percent of the stands are full?\nChoices:\n(A) 15\n(B) 100\n(C) 50\n(D) 50", + "choices": [ + "15", + "100", + "50", + "50" + ], + "answer": "15", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 375, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "742": { + "question_id": "742", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the twig to the nearest inch. The twig is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 159, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "744": { + "question_id": "744", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If RL = 5, RT = 9, and WS = 6, find RW.\nChoices:\n(A) 5.4\n(B) 6\n(C) 6.6\n(D) 7.5", + "choices": [ + "5.4", + "6", + "6.6", + "7.5" + ], + "answer": "7.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5.4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 199, + "img_width": 404, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "746": { + "question_id": "746", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Mrs. Zimmerman hosts an annual art contest for kids, and she keeps a record of the number of entries each year. According to the table, what was the rate of change between 2013 and 2014? (Unit: entries per year)", + "choices": null, + "answer": "7", + "extraction": "13", + "prediction": "13", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 199, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "748": { + "question_id": "748", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, PA and PB are tangents of \u2299O, the tangent point of point A and B, AC is the diameter of \u2299O, given that \u2220P = 50.0, then the size of \u2220ACB is ()\nChoices:\n(A) 65\u00b0\n(B) 60\u00b0\n(C) 55\u00b0\n(D) 50\u00b0", + "choices": [ + "65\u00b0", + "60\u00b0", + "55\u00b0", + "50\u00b0" + ], + "answer": "65\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 117, + "img_width": 207, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "750": { + "question_id": "750", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "18", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 356, + "img_width": 290, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "752": { + "question_id": "752", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cPA\u662f\u2299O\u7684\u5207\u7ebf\uff0c\u5207\u70b9\u4e3aA\uff0cOP\uff1d4\uff0c\u2220APO\uff1d30\u00b0\uff0c\u5219\u2299O\u7684\u534a\u5f84\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) \u221a{3}\n(C) 2\n(D) 3", + "choices": [ + "1", + "\u221a{3}", + "2", + "3" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 87, + "img_width": 122, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "754": { + "question_id": "754", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Base your answers on the diagram below, which shows a partial food web. What will happen to fish population if algae's are decreased?\nChoices:\n(A) Population will decrease\n(B) Population will remain the same\n(C) Population will increase\n(D) None of the above", + "choices": [ + "Population will decrease", + "Population will remain the same", + "Population will increase", + "None of the above" + ], + "answer": "Population will decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Population will decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 364, + "img_width": 464, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "756": { + "question_id": "756", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the trees died, the population of porcupine would most likely\nChoices:\n(A) double\n(B) skyrocket\n(C) decrease\n(D) increase", + "choices": [ + "double", + "skyrocket", + "decrease", + "increase" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "double", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 591, + "img_width": 765, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "758": { + "question_id": "758", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny purple trucks behind the small matte motorbike less than the number of fighters that are behind the big metal utility bike?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "760": { + "question_id": "760", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of yellow tandem bikes less than the number of big objects?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "762": { + "question_id": "762", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the center of symmetry of this function?\nChoices:\n(A) (0, 0)\n(B) (-1, 0)\n(C) (2, 0)", + "choices": [ + "(0, 0)", + "(-1, 0)", + "(2, 0)" + ], + "answer": "(0, 0)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(0, 0)", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 395, + "img_width": 500, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "764": { + "question_id": "764", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average number of bananas on each stock?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 349, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "766": { + "question_id": "766", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tiny red trucks than small blue bicycles?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "768": { + "question_id": "768", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use the graph to answer the question below. Which month is the hottest on average in Rome?\nChoices:\n(A) December, January, and February\n(B) July and August\n(C) March and April", + "choices": [ + "December, January, and February", + "July and August", + "March and April" + ], + "answer": "July and August", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "December, January, and February", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "elementary school", + "img_height": 323, + "img_width": 448, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "770": { + "question_id": "770", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the amplitude of this function?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 276, + "img_width": 482, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "772": { + "question_id": "772", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of small yellow shiny motorbikes greater than the number of red rubber fighters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "774": { + "question_id": "774", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer large matte utility bikes than small yellow bicycles?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "776": { + "question_id": "776", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $JQ$ if $Q$ is the incenter of $\\triangle JLN$. Rounded to the nearest hundredth.\nChoices:\n(A) 16.50\n(B) 18.79\n(C) 20.32\n(D) 25.50", + "choices": [ + "16.50", + "18.79", + "20.32", + "25.50" + ], + "answer": "18.79", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "16.50", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 424, + "img_width": 589, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "778": { + "question_id": "778", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you find the missing shape in this picture puzzle?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D", + "choices": [ + "A", + "B", + "C", + "D" + ], + "answer": "A", + "extraction": "D", + "prediction": "D", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 431, + "img_width": 797, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "780": { + "question_id": "780", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 209, + "img_width": 848, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "782": { + "question_id": "782", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "4", + "extraction": "18", + "prediction": "18", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 376, + "img_width": 384, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "784": { + "question_id": "784", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Across all years, what is the maximum rating of statistical capacity in Maldives ?", + "choices": null, + "answer": "70", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 938, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "786": { + "question_id": "786", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle K$\nChoices:\n(A) 6\n(B) 60\n(C) 100\n(D) 180", + "choices": [ + "6", + "60", + "100", + "180" + ], + "answer": "100", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 237, + "img_width": 317, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "788": { + "question_id": "788", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 332, + "img_width": 515, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "790": { + "question_id": "790", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u25b3ABC\u4e2d\uff0cN\u662fBC\u8fb9\u4e0a\u7684\u4e2d\u70b9\uff0cAM\u5e73\u5206\u2220BAC\uff0cBM\u22a5AM\u4e8e\u70b9M\uff0c\u82e5AB\uff1d8\uff0cMN\uff1d2\uff0e\u5219AC\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 10\n(B) 11\n(C) 12\n(D) 13", + "choices": [ + "10", + "11", + "12", + "13" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 105, + "img_width": 145, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "792": { + "question_id": "792", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2624, + "img_width": 3936, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "794": { + "question_id": "794", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values larger than 4?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "796": { + "question_id": "796", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1938, + "img_width": 2516, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "798": { + "question_id": "798", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, l || m. Which of the following must equal 180?\nChoices:\n(A) k + n + r\n(B) k + p + s\n(C) n + p + s\n(D) n + p + t\n(E) r + s + t", + "choices": [ + "k + n + r", + "k + p + s", + "n + p + s", + "n + p + t", + "r + s + t" + ], + "answer": "k + p + s", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "k + n + r", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 372, + "img_width": 371, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "800": { + "question_id": "800", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Medium Orchid intersect Forest Green?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 596, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "802": { + "question_id": "802", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Karen bought 4 pounds of silk scraps and 4 pounds of canvas scraps. How much did she spend? (Unit: $)", + "choices": null, + "answer": "69", + "extraction": "36", + "prediction": "36", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 194, + "img_width": 243, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "804": { + "question_id": "804", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\odot B$, $CE=13.5$. Find $BD$. Round to the nearest hundredth.\nChoices:\n(A) 3.71\n(B) 4.29\n(C) 4.53\n(D) 6.75", + "choices": [ + "3.71", + "4.29", + "4.53", + "6.75" + ], + "answer": "4.29", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3.71", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 524, + "img_width": 493, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "806": { + "question_id": "806", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, and point C is on \u2299O. If \u2220A = 40.0, then the degree of \u2220B is ()\nChoices:\n(A) 80\u00b0\n(B) 60\u00b0\n(C) 50\u00b0\n(D) 40\u00b0", + "choices": [ + "80\u00b0", + "60\u00b0", + "50\u00b0", + "40\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "80\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 107, + "img_width": 127, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "808": { + "question_id": "808", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large purple spheres. Subtract all small gray things. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "810": { + "question_id": "810", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow metallic balls. Subtract all small yellow shiny things. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "812": { + "question_id": "812", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does the gray bar always have smaller value?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 1286, + "img_width": 840, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "814": { + "question_id": "814", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest individual bar in the whole chart?", + "choices": null, + "answer": "100000000", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "816": { + "question_id": "816", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x. Round to the nearest tenth, if necessary.\nChoices:\n(A) 3\n(B) 9\n(C) 12.25\n(D) 24", + "choices": [ + "3", + "9", + "12.25", + "24" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 272, + "img_width": 379, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "818": { + "question_id": "818", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What's the ratio of least value of light brown graph and leftmost value of dark brown graph?", + "choices": null, + "answer": "0.32", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 434, + "img_width": 310, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "820": { + "question_id": "820", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $a=14, b=48,$ and $c=50$ find $cosA$\nChoices:\n(A) 0.14\n(B) 0.48\n(C) 0.50\n(D) 0.96", + "choices": [ + "0.14", + "0.48", + "0.50", + "0.96" + ], + "answer": "0.96", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.14", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 160, + "img_width": 238, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "822": { + "question_id": "822", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the perimeter of the parallelogram. Round to the nearest tenth if necessary.\nChoices:\n(A) 22\n(B) 40\n(C) 44\n(D) 48", + "choices": [ + "22", + "40", + "44", + "48" + ], + "answer": "44", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "22", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 227, + "img_width": 356, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "824": { + "question_id": "824", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)", + "choices": null, + "answer": "0.13", + "extraction": "0.97", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 192, + "img_width": 247, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "826": { + "question_id": "826", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is the largest part of the lung?\nChoices:\n(A) Inferior lobes\n(B) Cardiac notch\n(C) Superior lobes\n(D) Middle lobe", + "choices": [ + "Inferior lobes", + "Cardiac notch", + "Superior lobes", + "Middle lobe" + ], + "answer": "Superior lobes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Inferior lobes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 479, + "img_width": 638, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "828": { + "question_id": "828", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Linda wants to buy 0.9 pounds of double chocolate cookie dough. How much will she spend? (Unit: $)", + "choices": null, + "answer": "2.7", + "extraction": "3.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 194, + "img_width": 357, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "830": { + "question_id": "830", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "2", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 870, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "832": { + "question_id": "832", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(0)?", + "choices": null, + "answer": "-2", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1920, + "img_width": 1920, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "834": { + "question_id": "834", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the states that border Georgia , does Florida have the lowest value ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 610, + "img_width": 785, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "836": { + "question_id": "836", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the smallest species shown?\nChoices:\n(A) chinlea\n(B) arganodus\n(C) semionotus\n(D) xenacanthus", + "choices": [ + "chinlea", + "arganodus", + "semionotus", + "xenacanthus" + ], + "answer": "semionotus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "chinlea", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1076, + "img_width": 1500, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "838": { + "question_id": "838", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1200, + "img_width": 1600, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "840": { + "question_id": "840", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which item can you get the most protein?\nChoices:\n(A) salami\n(B) wine\n(C) cheese\n(D) bread", + "choices": [ + "salami", + "wine", + "cheese", + "bread" + ], + "answer": "salami", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "salami", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 375, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "842": { + "question_id": "842", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At a certain moment, there is a passenger ship at sea point P, and lighthouse A is measured in the direction 30.0 north by east of P, and is 50.0 nautical miles away. The passenger ship sails at the speed of 60.0 nautical mile/hour in the direction of 60.0 from north by west for $\\frac{2.0}{3.0}$hours to reach point B, then tan\u2220BAP = ()\nChoices:\n(A) \\frac{4}{5}\n(B) \\frac{6}{5}\n(C) \\frac{\u221a{5}}{5}\n(D) \\frac{2\u221a{5}}{5}", + "choices": [ + "\\frac{4}{5}", + "\\frac{6}{5}", + "\\frac{\u221a{5}}{5}", + "\\frac{2\u221a{5}}{5}" + ], + "answer": "\\frac{4}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{4}{5}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 115, + "img_width": 154, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "844": { + "question_id": "844", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the larger window shaped like the smaller window?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "846": { + "question_id": "846", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Brown the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 758, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "848": { + "question_id": "848", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the tuberculosis treatment success rate in Bulgaria greater than the average tuberculosis treatment success rate in Bulgaria taken over all years ?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1091, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "850": { + "question_id": "850", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of cars in front of the tiny metal thing less than the number of large matte things in front of the cyan rubber road bike?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "852": { + "question_id": "852", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "40", + "extraction": "13", + "prediction": "13", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 598, + "img_width": 612, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "854": { + "question_id": "854", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the pelicans in the community were eradicated, which population feel the most direct effect?\nChoices:\n(A) Plant\n(B) Phyto-plankton\n(C) Fish\n(D) Lizard", + "choices": [ + "Plant", + "Phyto-plankton", + "Fish", + "Lizard" + ], + "answer": "Fish", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Plant", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 947, + "img_width": 850, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "856": { + "question_id": "856", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture has the least leaves?\nChoices:\n(A) Both\n(B) Compound\n(C) Simple\n(D) Neither", + "choices": [ + "Both", + "Compound", + "Simple", + "Neither" + ], + "answer": "Simple", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Both", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 300, + "img_width": 400, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "858": { + "question_id": "858", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On the basis of the given food web, which organism will increase in number if there were no seals?\nChoices:\n(A) Shark\n(B) Small Shrimp\n(C) Octopus\n(D) Mysid Shrimp", + "choices": [ + "Shark", + "Small Shrimp", + "Octopus", + "Mysid Shrimp" + ], + "answer": "Octopus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Shark", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 764, + "img_width": 1162, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "860": { + "question_id": "860", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Miss Foley ran a sit-up competition among her P.E. students and monitored how many sit-ups each students could do. What is the largest number of sit-ups done? (Unit: sit-ups)", + "choices": null, + "answer": "86", + "extraction": "256", + "prediction": "256", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 246, + "img_width": 291, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "862": { + "question_id": "862", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: One of the most dramatic videos on the web (but entirely fictitious) supposedly shows a man sliding along a long water slide and then being launched into the air to land in a water pool. Let's attach some reasonable numbers to such a flight to calculate the velocity with which the man would have hit the water. Figure indicates the launch and landing sites and includes a superimposed coordinate system with its origin conveniently located at the launch site. From the video we take the horizontal flight distance as $D=20.0 \\mathrm{~m}$, the flight time as $t=2.50 \\mathrm{~s}$, and the launch angle as $\\theta_0=40.0^{\\circ}$. Find the magnitude of the velocity at launch and at landing.", + "choices": null, + "answer": "10.44", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 600, + "img_width": 1302, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "864": { + "question_id": "864", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "16", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1738, + "img_width": 2480, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "866": { + "question_id": "866", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For trapezoid $Q R S T, A$ and $B$ are midpoints of the legs. Find $m \\angle S$\nChoices:\n(A) 45\n(B) 60\n(C) 120\n(D) 135", + "choices": [ + "45", + "60", + "120", + "135" + ], + "answer": "135", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 169, + "img_width": 359, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "868": { + "question_id": "868", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big green cylinders. Subtract all rubber cylinders. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "5", + "prediction": "5", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "870": { + "question_id": "870", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there more tiny motorbikes in front of the small cyan tandem bike than big cyan metal double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "872": { + "question_id": "872", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the next shape.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D", + "choices": [ + "A", + "B", + "C", + "D" + ], + "answer": "D", + "extraction": "A", + "prediction": "A", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 496, + "img_width": 1472, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "874": { + "question_id": "874", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of y at x=-2.5?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 479, + "img_width": 479, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "876": { + "question_id": "876", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, square $ABDC$ is inscribed in $\\odot K$. Find the measure of a central angle.\nChoices:\n(A) 45\n(B) 60\n(C) 90\n(D) 180", + "choices": [ + "45", + "60", + "90", + "180" + ], + "answer": "90", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 275, + "img_width": 273, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "878": { + "question_id": "878", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220ACB\uff1d90\u00b0\uff0c\u4ee5Rt\u25b3ABC\u7684\u4e09\u8fb9\u4e3a\u8fb9\u5411\u5916\u4f5c\u6b63\u65b9\u5f62\uff0c\u5176\u9762\u79ef\u5206\u522b\u4e3aS1\uff0cS2\uff0cS3\uff0c\u4e14S1\uff1d5\uff0cS3\uff1d16\uff0c\u5219S2\uff1d\uff08\uff09\nChoices:\n(A) 6\n(B) 2\u221a{2}\n(C) 11\n(D) 24", + "choices": [ + "6", + "2\u221a{2}", + "11", + "24" + ], + "answer": "11", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 82, + "img_width": 94, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "880": { + "question_id": "880", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What's the total add up value of largest and smallest bar?", + "choices": null, + "answer": "252.65", + "extraction": "2.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "882": { + "question_id": "882", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Lawn Green the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 677, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "884": { + "question_id": "884", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the blue kite in the lower right corner shaped like?\nChoices:\n(A) ferret\n(B) cat\n(C) cloud\n(D) octopus", + "choices": [ + "ferret", + "cat", + "cloud", + "octopus" + ], + "answer": "octopus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "ferret", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "886": { + "question_id": "886", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A newspaper researched how many grocery stores there are in each town. What is the median of the numbers?'", + "choices": null, + "answer": "6", + "extraction": "6", + "prediction": "6", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 235, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "888": { + "question_id": "888", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small green shiny balls. Subtract all small metallic things. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "890": { + "question_id": "890", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is larger the moon or the sun?\nChoices:\n(A) Sun\n(B) It varies\n(C) They are equal in size\n(D) Moon", + "choices": [ + "Sun", + "It varies", + "They are equal in size", + "Moon" + ], + "answer": "Sun", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Sun", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 844, + "img_width": 1500, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "892": { + "question_id": "892", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does New Jersey have a higher value than Georgia ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "894": { + "question_id": "894", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms fat and acre?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "896": { + "question_id": "896", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Approximately, what percentage of jewelry sales in January were Rings?\nChoices:\n(A) Around 21%\n(B) Around 27%\n(C) Around 31%\n(D) Around 37%", + "choices": [ + "Around 21%", + "Around 27%", + "Around 31%", + "Around 37%" + ], + "answer": "Around 31%", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Around 21%", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "bar chart", + "grade": "elementary school", + "img_height": 464, + "img_width": 758, + "language": "english", + "skills": [ + "logical reasoning", + "statistical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "898": { + "question_id": "898", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, A, B, and C are the three points on \u2299O, if \u2220C = 35.0, then the degree of \u2220OAB is ()\nChoices:\n(A) 35\u00b0\n(B) 55\u00b0\n(C) 65\u00b0\n(D) 70\u00b0", + "choices": [ + "35\u00b0", + "55\u00b0", + "65\u00b0", + "70\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 109, + "img_width": 112, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "900": { + "question_id": "900", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of rubber cars less than the number of brown jets?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "902": { + "question_id": "902", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the leaf base has an angle greater than 90 degrees, what is it called?\nChoices:\n(A) obtuse\n(B) decurrent\n(C) cuneate\n(D) acute", + "choices": [ + "obtuse", + "decurrent", + "cuneate", + "acute" + ], + "answer": "obtuse", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "obtuse", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1429, + "img_width": 1500, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "904": { + "question_id": "904", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "906": { + "question_id": "906", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sum of smallest two value is greater then then largest value?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "908": { + "question_id": "908", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: which organism would most likely have a decrease in its population if decrease the population of ant base of above diagram?\nChoices:\n(A) plant\n(B) human\n(C) lizard\n(D) snake", + "choices": [ + "plant", + "human", + "lizard", + "snake" + ], + "answer": "lizard", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "plant", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 497, + "img_width": 312, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "910": { + "question_id": "910", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue metal balls. Subtract all large matte things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "912": { + "question_id": "912", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 413, + "img_width": 629, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "914": { + "question_id": "914", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny purple shiny cubes. Subtract all large purple balls. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "916": { + "question_id": "916", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220C = 90.0, \u2220A = 30.0, BC = 2.0, the radius of \u2299C is 1.0, point P is the point on the hypotenuse AB, passing point P is a tangent PQ of \u2299C (Point Q is the tangent point), then the minimum value of the line segment PQ is ()\nChoices:\n(A) 2\n(B) \u221a{3}\n(C) \u221a{2}\n(D) 2-\\frac{\u221a{3}}{3}", + "choices": [ + "2", + "\u221a{3}", + "\u221a{2}", + "2-\\frac{\u221a{3}}{3}" + ], + "answer": "\u221a{2}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 145, + "img_width": 112, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "918": { + "question_id": "918", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Calculate the missing item.", + "choices": null, + "answer": "1", + "extraction": "23", + "prediction": "23", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 492, + "img_width": 538, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "920": { + "question_id": "920", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The measure of angle BAC equals x*\\degree. What is the value of x?", + "choices": null, + "answer": "30", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 310, + "img_width": 388, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "922": { + "question_id": "922", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual element in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "924": { + "question_id": "924", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Periwinkle have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 587, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "926": { + "question_id": "926", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the size of the shaded area under the curve? Round the answer to 2 decimal places", + "choices": null, + "answer": "7.07", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 312, + "img_width": 433, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "928": { + "question_id": "928", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much more does a navy blue bath mat cost than a yellow bath towel? (Unit: $)", + "choices": null, + "answer": "5", + "extraction": "7", + "prediction": "7", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 160, + "img_width": 234, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "930": { + "question_id": "930", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cF\u662f\u25b3ABC\u7684\u89d2\u5e73\u5206\u7ebfCD\u548cBE\u7684\u4ea4\u70b9\uff0cCG\u22a5AB\u4e8e\u70b9G\uff0e\u82e5\u2220ACG\uff1d32\u00b0\uff0c\u5219\u2220BFC\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 119\u00b0\n(B) 122\u00b0\n(C) 148\u00b0\n(D) 150\u00b0", + "choices": [ + "119\u00b0", + "122\u00b0", + "148\u00b0", + "150\u00b0" + ], + "answer": "119\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "119\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 79, + "img_width": 113, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "932": { + "question_id": "932", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to the phytoplankton if krill increased?\nChoices:\n(A) decrease\n(B) increase\n(C) can't be predicted\n(D) stay the same", + "choices": [ + "decrease", + "increase", + "can't be predicted", + "stay the same" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 350, + "img_width": 750, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "934": { + "question_id": "934", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "10000", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "936": { + "question_id": "936", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 892, + "img_width": 710, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "938": { + "question_id": "938", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, $m \u22209 = 75$. Find the measure of $\\angle 6$.\nChoices:\n(A) 75\n(B) 85\n(C) 95\n(D) 105", + "choices": [ + "75", + "85", + "95", + "105" + ], + "answer": "105", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "75", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 278, + "img_width": 417, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "940": { + "question_id": "940", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big red things. Subtract all metallic things. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "6", + "prediction": "6", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "942": { + "question_id": "942", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(0)?", + "choices": null, + "answer": "0", + "extraction": "-10", + "prediction": "-10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 395, + "img_width": 500, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "944": { + "question_id": "944", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 241, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "946": { + "question_id": "946", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "16", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 373, + "img_width": 560, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "948": { + "question_id": "948", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Some students compared how many blocks they live from school. What is the mean of the numbers?'", + "choices": null, + "answer": "11", + "extraction": "14", + "prediction": "14", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 311, + "img_width": 207, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "950": { + "question_id": "950", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The slope of f(x) at x=0 is ____\nChoices:\n(A) positive\n(B) negative\n(C) zero\n(D) undefined", + "choices": [ + "positive", + "negative", + "zero", + "undefined" + ], + "answer": "positive", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "positive", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 744, + "img_width": 1114, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "952": { + "question_id": "952", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Base your answers on the food web below and on your knowledge of biology. A decrease in the Aquatic crustaceans population will most immediately decrease the available energy for the\nChoices:\n(A) Minnows\n(B) Ducks\n(C) Fish\n(D) Raccoons", + "choices": [ + "Minnows", + "Ducks", + "Fish", + "Raccoons" + ], + "answer": "Fish", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Minnows", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 258, + "img_width": 456, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "954": { + "question_id": "954", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A partial food web is shown below. Which of the following will most likely happen if the snake population decreases?\nChoices:\n(A) Cricket will increase\n(B) Mouse will increase\n(C) Rabbit will increase\n(D) All of above", + "choices": [ + "Cricket will increase", + "Mouse will increase", + "Rabbit will increase", + "All of above" + ], + "answer": "All of above", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Cricket will increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 277, + "img_width": 475, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "956": { + "question_id": "956", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small blue rubber objects. Subtract all brown shiny balls. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "6", + "prediction": "6", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "958": { + "question_id": "958", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the missing letters from below to form a word, using all letters presented\nChoices:\n(A) A, R, N\n(B) R, D, N\n(C) I, A, M\n(D) H, O, W", + "choices": [ + "A, R, N", + "R, D, N", + "I, A, M", + "H, O, W" + ], + "answer": "R, D, N", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "A, R, N", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 773, + "img_width": 945, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "960": { + "question_id": "960", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1365, + "img_width": 2048, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "962": { + "question_id": "962", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The value of y at x=10 is ____ that at x=70.\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "smaller than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 301, + "img_width": 387, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "964": { + "question_id": "964", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 70, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "966": { + "question_id": "966", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the pencil to the nearest inch. The pencil is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "7", + "prediction": "7", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 166, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "968": { + "question_id": "968", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue balls. Subtract all big yellow rubber balls. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "970": { + "question_id": "970", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u4e24\u76f4\u7ebfa\uff0cb\u88ab\u76f4\u7ebfc\u6240\u622a\uff0c\u5df2\u77e5a\u2225b\uff0c\u22201\uff1d62\u00b0\uff0c\u5219\u22202\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 62\u00b0\n(B) 108\u00b0\n(C) 118\u00b0\n(D) 128\u00b0", + "choices": [ + "62\u00b0", + "108\u00b0", + "118\u00b0", + "128\u00b0" + ], + "answer": "118\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "62\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 141, + "img_width": 135, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "972": { + "question_id": "972", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of yellow shiny utility bikes greater than the number of brown metallic cruisers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "974": { + "question_id": "974", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there the same number of big blue trucks and large purple metal double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "976": { + "question_id": "976", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of metal biplanes behind the purple shiny object less than the number of purple school buss behind the big red object?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "978": { + "question_id": "978", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Allie kept a written log of how many miles she biked during the past 7 days. What is the range of the numbers?'", + "choices": null, + "answer": "7", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 280, + "img_width": 230, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "980": { + "question_id": "980", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest number shown?", + "choices": null, + "answer": "12", + "extraction": "12", + "prediction": "12", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 640, + "img_width": 429, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "982": { + "question_id": "982", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the states that border Wyoming , does South Dakota have the highest value ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "984": { + "question_id": "984", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of gray cars less than the number of small metallic minivans?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "986": { + "question_id": "986", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0cAD\u662f\u89d2\u5e73\u5206\u7ebf\uff0cAE\u662f\u9ad8\uff0e\u82e5\u2220B\uff1d40\u00b0\uff0c\u2220C\uff1d70\u00b0\uff0c\u5219\u2220EAD\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 10\u00b0\n(B) 15\u00b0\n(C) 17.5\u00b0\n(D) 20\u00b0", + "choices": [ + "10\u00b0", + "15\u00b0", + "17.5\u00b0", + "20\u00b0" + ], + "answer": "15\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 68, + "img_width": 101, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "988": { + "question_id": "988", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 333, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "990": { + "question_id": "990", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\odot S$, $m \\widehat {PQR}=98$, Find $m \\widehat {PQ}$.\nChoices:\n(A) 45\n(B) 49\n(C) 90\n(D) 98", + "choices": [ + "45", + "49", + "90", + "98" + ], + "answer": "49", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 452, + "img_width": 544, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "992": { + "question_id": "992", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of purple metallic things that are behind the small green motorbike less than the number of blue metal articulated buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "994": { + "question_id": "994", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Magenta greater than Web Maroon?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 548, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "996": { + "question_id": "996", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big shiny balls. Subtract all blue rubber blocks. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "998": { + "question_id": "998", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff1a\u2220AOB\uff1a\u2220BOC\uff1a\u2220COD\uff1d2\uff1a3\uff1a4\uff0c\u5c04\u7ebfOM\u3001ON\uff0c\u5206\u522b\u5e73\u5206\u2220AOB\u4e0e\u2220COD\uff0c\u53c8\u2220MON\uff1d84\u00b0\uff0c\u5219\u2220AOB\u4e3a\uff08\uff09\nChoices:\n(A) 28\u00b0\n(B) 30\u00b0\n(C) 32\u00b0\n(D) 38\u00b0", + "choices": [ + "28\u00b0", + "30\u00b0", + "32\u00b0", + "38\u00b0" + ], + "answer": "28\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "28\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 181, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "1000": { + "question_id": "1000", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown matte cylinders. Subtract all big purple matte things. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + } +} \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mathvista_testmini.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mathvista_testmini.json new file mode 100644 index 0000000000000000000000000000000000000000..e4a5b6959b244a4947af5202e3867aedea6183ec --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mathvista_testmini.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c597f6e7990da8a6152fa835b8fb94f55ffdf7abd8c89e3f77272a9574aa9099 +size 45275763 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mme.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mme.json new file mode 100644 index 0000000000000000000000000000000000000000..9e71e36e5eeaa79d3b6f043cb8342a7e51dccade --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mme.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d1013eec1a07465ff759dc3031c2921cb4081bcc386e98c43120cc47847ee69 +size 94631509 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mmmu_val.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mmmu_val.json new file mode 100644 index 0000000000000000000000000000000000000000..d6b9dbd54f27bcca4d9f3299cd9281690ab317e0 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mmmu_val.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4026a47ec71284e1d594a154cb844d5aeaba76ce68754ec9b86d52078f260021 +size 36750611 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mmstar.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mmstar.json new file mode 100644 index 0000000000000000000000000000000000000000..fefe948b556109b8aa8afa2c1fadcea64c6dd0bc --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mmstar.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ccf81f0fb7a589e721f551930e15431159b8a722590a0146130265df917a3ca +size 60427594 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/rank0_metric_eval_done.txt b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/rank0_metric_eval_done.txt new file mode 100644 index 0000000000000000000000000000000000000000..b9c064df42468d805177a80623c54c976c8d760e --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/rank0_metric_eval_done.txt @@ -0,0 +1 @@ +rank 0 eval done \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/rank1_metric_eval_done.txt b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/rank1_metric_eval_done.txt new file mode 100644 index 0000000000000000000000000000000000000000..36792c9cedb6c006db3a866d72eac15f0ce6a64a --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/rank1_metric_eval_done.txt @@ -0,0 +1 @@ +rank 1 eval done \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/results.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/results.json new file mode 100644 index 0000000000000000000000000000000000000000..fa53948365f266972435a1d9d5d3782bf85bbd43 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/results.json @@ -0,0 +1,285 @@ +{ + "results": { + "mathvista_testmini": { + "gpt_eval_score,none": 24.0, + "gpt_eval_score_stderr,none": "N/A", + "alias": "mathvista_testmini" + }, + "mme": { + "mme_cognition_score,none": 324.2857142857143, + "mme_cognition_score_stderr,none": "N/A", + "mme_percetion_score,none": 1410.4883953581434, + "mme_percetion_score_stderr,none": "N/A", + "alias": "mme" + }, + "mmmu_val": { + "mmmu_acc,none": 0.42222, + "mmmu_acc_stderr,none": "N/A", + "alias": "mmmu_val" + }, + "mmstar": { + "coarse perception,none": 0.6964404085200243, + "coarse perception_stderr,none": "N/A", + "fine-grained perception,none": 0.3732928027511668, + "fine-grained perception_stderr,none": "N/A", + "instance reasoning,none": 0.5295256540272851, + "instance reasoning_stderr,none": "N/A", + "logical reasoning,none": 0.375954518528776, + "logical reasoning_stderr,none": "N/A", + "math,none": 0.3063853247794707, + "math_stderr,none": "N/A", + "science & technology,none": 0.23852115038371227, + "science & technology_stderr,none": "N/A", + "alias": "mmstar" + } + }, + "configs": { + "mathvista_testmini": { + "task": "mathvista_testmini", + "dataset_path": "AI4Math/MathVista", + "dataset_kwargs": { + "token": true + }, + "test_split": "testmini", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "gpt_eval_score", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "until": [ + "ASSISTANT:" + ], + "max_new_tokens": 1024, + "temperature": 0.0, + "top_p": 1.0, + "num_beams": 1, + "do_sample": false, + "image_aspect_ratio": "original" + }, + "repeats": 1, + "should_decontaminate": false, + "model_specific_prompt_kwargs": { + "default": { + "shot_type": "format-prompt", + "shot": 0, + "use_caption": false, + "use_ocr": false + }, + "phi3v": { + "shot_type": "solution" + } + }, + "model_specific_generation_kwargs": { + "llava": { + "image_aspect_ratio": "original" + } + } + }, + "mme": { + "task": "mme", + "dataset_path": "lmms-lab/MME", + "dataset_kwargs": { + "token": false + }, + "test_split": "test", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "mme_percetion_score", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "mme_cognition_score", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "max_new_tokens": 16, + "temperature": 0.0, + "top_p": 1.0, + "num_beams": 1, + "do_sample": false, + "until": [ + "\n\n" + ] + }, + "repeats": 1, + "should_decontaminate": false, + "metadata": [ + { + "version": 0.0 + } + ], + "model_specific_prompt_kwargs": { + "default": { + "pre_prompt": "", + "post_prompt": "\nAnswer the question using a single word or phrase." + }, + "gpt4v": { + "pre_prompt": "", + "post_prompt": "\nAnswer the question with Yes or No." + }, + "qwen_vl": { + "pre_prompt": "", + "post_prompt": " Answer:" + }, + "otterhd": { + "pre_prompt": "", + "post_prompt": " Answer:" + }, + "xcomposer2_4khd": { + "pre_prompt": "[UNUSED_TOKEN_146]user\n", + "post_prompt": " Answer this question briefly[UNUSED_TOKEN_145]\n[UNUSED_TOKEN_146]assistant\n" + } + } + }, + "mmmu_val": { + "task": "mmmu_val", + "dataset_path": "lmms-lab/MMMU", + "test_split": "validation", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "mmmu_acc", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "max_new_tokens": 128, + "until": [ + "\n\n" + ], + "image_aspect_ratio": "original" + }, + "repeats": 1, + "should_decontaminate": false, + "metadata": [ + { + "version": 0.0 + } + ], + "model_specific_generation_kwargs": { + "llava": { + "image_aspect_ratio": "original" + } + } + }, + "mmstar": { + "task": "mmstar", + "dataset_path": "Lin-Chen/MMStar", + "dataset_kwargs": { + "token": true + }, + "test_split": "val", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "coarse perception", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "fine-grained perception", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "instance reasoning", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "logical reasoning", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "science & technology", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "math", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "until": [ + "\n\n" + ], + "do_sample": false + }, + "repeats": 1, + "should_decontaminate": false, + "metadata": [ + { + "version": 0.0 + } + ], + "model_specific_prompt_kwargs": { + "default": { + "pre_prompt": "", + "post_prompt": "\nAnswer with the option's letter from the given choices directly" + } + } + } + }, + "versions": { + "mathvista_testmini": "Yaml", + "mme": "Yaml", + "mmmu_val": "Yaml", + "mmstar": "Yaml" + }, + "n-shot": { + "mathvista_testmini": 0, + "mme": 0, + "mmmu_val": 0, + "mmstar": 0 + }, + "model_configs": { + "model": "llava", + "model_args": "pretrained=/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/SMOE/665K36/revise_Full_smoe_sharev3/checkpoint-16636,conv_template=phi35", + "batch_size": "1", + "device": null, + "limit": null, + "bootstrap_iters": 100000, + "gen_kwargs": "" + }, + "git_hash": "289c7fe5" +} \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/submissions/mathvista_testmini_scores.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/submissions/mathvista_testmini_scores.json new file mode 100644 index 0000000000000000000000000000000000000000..9d52890c1d00b0a370ebe56567c99d6b55708642 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/submissions/mathvista_testmini_scores.json @@ -0,0 +1,26873 @@ +{ + "1": { + "question_id": "1", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: When a spring does work on an object, we cannot find the work by simply multiplying the spring force by the object's displacement. The reason is that there is no one value for the force-it changes. However, we can split the displacement up into an infinite number of tiny parts and then approximate the force in each as being constant. Integration sums the work done in all those parts. Here we use the generic result of the integration.\r\n\r\nIn Figure, a cumin canister of mass $m=0.40 \\mathrm{~kg}$ slides across a horizontal frictionless counter with speed $v=0.50 \\mathrm{~m} / \\mathrm{s}$. It then runs into and compresses a spring of spring constant $k=750 \\mathrm{~N} / \\mathrm{m}$. When the canister is momentarily stopped by the spring, by what distance $d$ is the spring compressed?", + "choices": null, + "answer": "1.2", + "extraction": "0.1", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 720, + "img_width": 1514, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "3": { + "question_id": "3", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u25b3ABC\u7684\u4e24\u5185\u89d2\u5e73\u5206\u7ebfOB\u3001OC\u76f8\u4ea4\u4e8e\u70b9O\uff0c\u82e5\u2220A\uff1d110\u00b0\uff0c\u5219\u2220BOC\uff1d\uff08\uff09\nChoices:\n(A) 135\u00b0\n(B) 140\u00b0\n(C) 145\u00b0\n(D) 150\u00b0", + "choices": [ + "135\u00b0", + "140\u00b0", + "145\u00b0", + "150\u00b0" + ], + "answer": "145\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "135\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 60, + "img_width": 131, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "5": { + "question_id": "5", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m\\angle H$\nChoices:\n(A) 97\n(B) 102\n(C) 107\n(D) 122", + "choices": [ + "97", + "102", + "107", + "122" + ], + "answer": "97", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "97", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 245, + "img_width": 322, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "7": { + "question_id": "7", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) after eight.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "9": { + "question_id": "9", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\u662f\u4e00\u682a\u7f8e\u4e3d\u7684\u52fe\u80a1\u6811\uff0c\u5176\u4e2d\u6240\u6709\u56db\u8fb9\u5f62\u90fd\u662f\u6b63\u65b9\u5f62\uff0c\u6240\u6709\u7684\u4e09\u89d2\u5f62\u90fd\u662f\u76f4\u89d2\u4e09\u89d2\u5f62\uff0c\u82e5\u6b63\u65b9\u5f62A\u3001B\u7684\u9762\u79ef\u5206\u522b\u4e3a5\u30013\uff0c\u5219\u6700\u5927\u6b63\u65b9\u5f62C\u7684\u9762\u79ef\u662f\uff08\uff09\nChoices:\n(A) 15\n(B) 13\n(C) 11\n(D) 8", + "choices": [ + "15", + "13", + "11", + "8" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 155, + "img_width": 134, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "11": { + "question_id": "11", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red things. Subtract all tiny matte balls. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "13": { + "question_id": "13", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many objects are preferred by more than 90 percent of people in at least one category?", + "choices": null, + "answer": "0", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "15": { + "question_id": "15", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism with be most affected if algae was eliminated?\nChoices:\n(A) Tilapia\n(B) Common water flea\n(C) Great diving beetle\n(D) Tadpole", + "choices": [ + "Tilapia", + "Common water flea", + "Great diving beetle", + "Tadpole" + ], + "answer": "Common water flea", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Tilapia", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 232, + "img_width": 400, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "17": { + "question_id": "17", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220ACB\uff1d90\u00b0\uff0cD\u662fAB\u7684\u4e2d\u70b9\uff0cAB\uff1d10\uff0c\u5219CD\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 5\n(B) 6\n(C) 8\n(D) 10", + "choices": [ + "5", + "6", + "8", + "10" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 172, + "img_width": 125, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "19": { + "question_id": "19", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the highest amount this class measures?", + "choices": null, + "answer": "400", + "extraction": "400", + "prediction": "400", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 684, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "21": { + "question_id": "21", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 4 dots divided into 2 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 418, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "23": { + "question_id": "23", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The derivative of f(x) at x=2 is ____ that at x=5\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "equal to", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 393, + "img_width": 552, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "25": { + "question_id": "25", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Periwinkle the smoothest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 770, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "27": { + "question_id": "27", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "11", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1752, + "img_width": 2628, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "29": { + "question_id": "29", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 440, + "img_width": 670, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "31": { + "question_id": "31", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there more big red rubber double buss in front of the large red double bus than big green things?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "33": { + "question_id": "33", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use a sector paper sheet with a central angle of 120.0 and a radius of 6.0 to roll into a conical bottomless paper cap (as shown in the picture), then the bottom perimeter of the paper cap is ()\nChoices:\n(A) 2\u03c0cm\n(B) 3\u03c0cm\n(C) 4\u03c0cm\n(D) 5\u03c0cm", + "choices": [ + "2\u03c0cm", + "3\u03c0cm", + "4\u03c0cm", + "5\u03c0cm" + ], + "answer": "4\u03c0cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2\u03c0cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 95, + "img_width": 331, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "35": { + "question_id": "35", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u662f\u2299O\u7684\u76f4\u5f84\uff0cEF\uff0cEB\u662f\u2299O\u7684\u5f26\uff0c\u70b9E\u662fFEB\u7684\u4e2d\u70b9\uff0cEF\u4e0eAB\u4ea4\u4e8e\u70b9C\uff0c\u8fde\u63a5OF\uff0c\u82e5\u2220AOF\uff1d40\u00b0\uff0c\u5219\u2220F\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 20\u00b0\n(B) 35\u00b0\n(C) 40\u00b0\n(D) 55\u00b0", + "choices": [ + "20\u00b0", + "35\u00b0", + "40\u00b0", + "55\u00b0" + ], + "answer": "35\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 141, + "img_width": 151, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "37": { + "question_id": "37", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the limit as x approaches -1?", + "choices": null, + "answer": "3", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 410, + "img_width": 408, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "39": { + "question_id": "39", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function odd or even?\nChoices:\n(A) odd\n(B) even", + "choices": [ + "odd", + "even" + ], + "answer": "odd", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "odd", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 304, + "img_width": 433, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "41": { + "question_id": "41", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 3491, + "img_width": 5236, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "43": { + "question_id": "43", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use the graph to answer the question below. Which month is the wettest on average in Christchurch?\nChoices:\n(A) August\n(B) April\n(C) May", + "choices": [ + "August", + "April", + "May" + ], + "answer": "May", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "August", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "elementary school", + "img_height": 323, + "img_width": 449, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "45": { + "question_id": "45", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An administrator at the Department of Motor Vehicles (DMV) tracked the average wait time from month to month. According to the table, what was the rate of change between August and September? (Unit: minutes per month)", + "choices": null, + "answer": "-3", + "extraction": "-1", + "prediction": "-1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 273, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "47": { + "question_id": "47", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all rubber balls. Subtract all yellow shiny things. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "6", + "prediction": "6", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "49": { + "question_id": "49", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the digits on either end of the sign in the corner?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 476, + "img_width": 626, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "51": { + "question_id": "51", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of gray rubber objects in front of the small yellow aeroplane greater than the number of big cyan matte fighters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "53": { + "question_id": "53", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 593, + "img_width": 800, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "55": { + "question_id": "55", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u4e00\u5757\u76f4\u89d2\u4e09\u89d2\u677f60\u00b0\u7684\u89d2\u7684\u9876\u70b9A\u4e0e\u76f4\u89d2\u9876\u70b9C\u5206\u522b\u5728\u4e24\u5e73\u884c\u7ebfFG\uff0cDE\u4e0a\uff0c\u659c\u8fb9AB\u5e73\u5206\u2220CAG\uff0c\u4ea4\u76f4\u7ebfDE\u4e8e\u70b9H\uff0c\u5219\u2220BCH\u7684\u5927\u5c0f\u4e3a\uff08\uff09\nChoices:\n(A) 60\u00b0\n(B) 45\u00b0\n(C) 30\u00b0\n(D) 25\u00b0", + "choices": [ + "60\u00b0", + "45\u00b0", + "30\u00b0", + "25\u00b0" + ], + "answer": "30\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 125, + "img_width": 175, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "57": { + "question_id": "57", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small balls. Subtract all blue rubber things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "59": { + "question_id": "59", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, CD is the chord of \u2299O, \u2220ADC = 26.0, then the degree of \u2220CAB is ()\nChoices:\n(A) 26\u00b0\n(B) 74\u00b0\n(C) 64\u00b0\n(D) 54\u00b0", + "choices": [ + "26\u00b0", + "74\u00b0", + "64\u00b0", + "54\u00b0" + ], + "answer": "64\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "26\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 146, + "img_width": 157, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "61": { + "question_id": "61", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Coral the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 427, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "63": { + "question_id": "63", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red matte cubes. Subtract all small green metal objects. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "65": { + "question_id": "65", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: is f(3) > 0?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 325, + "img_width": 327, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "67": { + "question_id": "67", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the square?", + "choices": null, + "answer": "16", + "extraction": "16", + "prediction": "16", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 292, + "img_width": 320, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "69": { + "question_id": "69", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big matte balls. Subtract all green rubber objects. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "71": { + "question_id": "71", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the rectangle?", + "choices": null, + "answer": "18", + "extraction": "24", + "prediction": "24", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 292, + "img_width": 187, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "73": { + "question_id": "73", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Complete the matrix.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "D", + "extraction": "A", + "prediction": "A", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 654, + "img_width": 387, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "75": { + "question_id": "75", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Sky Blue less than Web Maroon?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "77": { + "question_id": "77", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year showed the largest difference in the data points between the two lines", + "choices": null, + "answer": "2019", + "extraction": "2014", + "prediction": "2014", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "79": { + "question_id": "79", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A, B, C, and D are on circle O, and point E is on the extended line of AD. If \u2220ABC = 60.0, then the degree of \u2220CDE is ()\nChoices:\n(A) 30\u00b0\n(B) 45\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "30\u00b0", + "45\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "60\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 104, + "img_width": 123, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "81": { + "question_id": "81", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of r at theta=3*pi/2?", + "choices": null, + "answer": "-1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 460, + "img_width": 616, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "83": { + "question_id": "83", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of shiny buss less than the number of matte things?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "85": { + "question_id": "85", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many countries have people working for more than 35 hours over the years?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "87": { + "question_id": "87", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At a price of $790, is there a shortage or a surplus?'\nChoices:\n(A) shortage\n(B) surplus", + "choices": [ + "shortage", + "surplus" + ], + "answer": "surplus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "shortage", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 353, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "89": { + "question_id": "89", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many miles per gallon do an average city bus get?", + "choices": null, + "answer": "25", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 384, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "91": { + "question_id": "91", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of brown suvs less than the number of brown rubber school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "93": { + "question_id": "93", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What's the computing and wirless total for semiconductor demand in 2014?", + "choices": null, + "answer": "197.3", + "extraction": "100.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "95": { + "question_id": "95", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the straight lines AB and CD intersect at point O, OD bisects \u2220AOE, \u2220BOC = 50.0, then \u2220EOB = ()\nChoices:\n(A) 50\u00b0\n(B) 60\u00b0\n(C) 70\u00b0\n(D) 80\u00b0", + "choices": [ + "50\u00b0", + "60\u00b0", + "70\u00b0", + "80\u00b0" + ], + "answer": "80\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 162, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "97": { + "question_id": "97", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracies higher than 9?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "99": { + "question_id": "99", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which cat is larger?\nChoices:\n(A) white five\n(B) white three\n(C) white four\n(D) white one\n(E) white two", + "choices": [ + "white five", + "white three", + "white four", + "white one", + "white two" + ], + "answer": "white one", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "white five", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "101": { + "question_id": "101", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which shape is most erect?\nChoices:\n(A) Lanceolate\n(B) Heart-shaped\n(C) Linear\n(D) Spatulate", + "choices": [ + "Lanceolate", + "Heart-shaped", + "Linear", + "Spatulate" + ], + "answer": "Linear", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Lanceolate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1204, + "img_width": 376, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "103": { + "question_id": "103", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small purple matte blocks. Subtract all blocks. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "105": { + "question_id": "105", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Violet have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 727, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "107": { + "question_id": "107", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past six.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "109": { + "question_id": "109", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny balls. Subtract all green metallic things. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "5", + "prediction": "5", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "111": { + "question_id": "111", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big gray matte things. Subtract all small metallic cylinders. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "113": { + "question_id": "113", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many baseballs are there?", + "choices": null, + "answer": "20", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 458, + "img_width": 721, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "115": { + "question_id": "115", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1079, + "img_width": 826, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "117": { + "question_id": "117", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the range of this function?\nChoices:\n(A) [0, 2]\n(B) [3, 2]\n(C) [2, 4]\n(D) [-3, 4]", + "choices": [ + "[0, 2]", + "[3, 2]", + "[2, 4]", + "[-3, 4]" + ], + "answer": "[0, 2]", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "[0, 2]", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 356, + "img_width": 460, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "119": { + "question_id": "119", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, P is a point outside \u2299O, PA and PB intersect \u2299O at two points C and D respectively. It is known that the central angles of \u2040AB and \u2040CD are 90.0 and 50.0 respectively, then \u2220P = ()\nChoices:\n(A) 45\u00b0\n(B) 40\u00b0\n(C) 25\u00b0\n(D) 20\u00b0", + "choices": [ + "45\u00b0", + "40\u00b0", + "25\u00b0", + "20\u00b0" + ], + "answer": "20\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 165, + "img_width": 103, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "121": { + "question_id": "121", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In trying to calculate how much money could be saved by packing lunch, Manny recorded the amount he spent on lunch each day. According to the table, what was the rate of change between Wednesday and Thursday? (Unit: $, per day)", + "choices": null, + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 235, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "123": { + "question_id": "123", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagram represents successive rotations, starting from the top down. Which shape comes next?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "D", + "extraction": "B", + "prediction": "B", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 579, + "img_width": 412, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "125": { + "question_id": "125", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens if caterpillars decrease?\nChoices:\n(A) plants decrease\n(B) plants increase\n(C) nothing happens\n(D) none of the above", + "choices": [ + "plants decrease", + "plants increase", + "nothing happens", + "none of the above" + ], + "answer": "plants increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "plants decrease", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 947, + "img_width": 850, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "127": { + "question_id": "127", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much more accurate is the most accurate algorithm compared the least accurate algorithm?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "129": { + "question_id": "129", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the twig to the nearest inch. The twig is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 156, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "131": { + "question_id": "131", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have value below 40?", + "choices": null, + "answer": "3", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "133": { + "question_id": "133", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the merchandise exports greater than 0.92 %?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1268, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "135": { + "question_id": "135", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of buss that are in front of the big yellow aeroplane less than the number of matte bicycles that are on the right side of the tiny thing?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "137": { + "question_id": "137", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function (f: R to R) injective?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 291, + "img_width": 258, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "139": { + "question_id": "139", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Indigo have the lowest value?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 543, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "141": { + "question_id": "141", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is a long ladder leaning on the wall, the foot of the ladder B is away from the wall 1.6, the point D on the ladder is away from the wall 1.4, the length of BD is 0.55, then the length of the ladder is ()\nChoices:\n(A) 3.85\u7c73\n(B) 4.00\u7c73\n(C) 4.40\u7c73\n(D) 4.50\u7c73", + "choices": [ + "3.85\u7c73", + "4.00\u7c73", + "4.40\u7c73", + "4.50\u7c73" + ], + "answer": "4.40\u7c73", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3.85\u7c73", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 128, + "img_width": 78, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "143": { + "question_id": "143", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the parallelogram ABCD, CE bisects \u2220BCD and it intersects the AD edge at point E, and DE = 3.0, then the length of AB is ()\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 6", + "choices": [ + "1", + "2", + "3", + "6" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 85, + "img_width": 204, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "145": { + "question_id": "145", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Can you find the missing term?", + "choices": null, + "answer": "10", + "extraction": "15", + "prediction": "15", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 506, + "img_width": 900, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "147": { + "question_id": "147", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagrams below show two pure samples of gas in identical closed, rigid containers. Each colored ball represents one gas particle. Both samples have the same number of particles. Compare the average kinetic energies of the particles in each sample. Which sample has the higher temperature?\nChoices:\n(A) neither; the samples have the same temperature\n(B) sample B\n(C) sample A", + "choices": [ + "neither; the samples have the same temperature", + "sample B", + "sample A" + ], + "answer": "sample B", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; the samples have the same temperature", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 405, + "img_width": 563, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "149": { + "question_id": "149", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfl1\u2225l2\uff0c\u22201\uff1d50\u00b0\uff0c\u22202\uff1d75\u00b0\uff0c\u5219\u22203\uff1d\uff08\uff09\nChoices:\n(A) 55\u00b0\n(B) 60\u00b0\n(C) 65\u00b0\n(D) 70\u00b0", + "choices": [ + "55\u00b0", + "60\u00b0", + "65\u00b0", + "70\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "55\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 93, + "img_width": 156, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "151": { + "question_id": "151", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When does the function reach its local maximum?\nChoices:\n(A) (u1, u2) = (0, 0)\n(B) (u1, u2) = (1, 0)\n(C) (u1, u2) = (0, 1)\n(D) (u1, u2) = (1, 1)", + "choices": [ + "(u1, u2) = (0, 0)", + "(u1, u2) = (1, 0)", + "(u1, u2) = (0, 1)", + "(u1, u2) = (1, 1)" + ], + "answer": "(u1, u2) = (0, 0)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(u1, u2) = (0, 0)", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 325, + "img_width": 458, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "153": { + "question_id": "153", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would be impacted by an increase in owls?\nChoices:\n(A) sun\n(B) grasshoppers\n(C) grass\n(D) mice", + "choices": [ + "sun", + "grasshoppers", + "grass", + "mice" + ], + "answer": "mice", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "sun", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 423, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "155": { + "question_id": "155", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Web Green have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 601, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "157": { + "question_id": "157", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9335", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 279, + "img_width": 637, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "159": { + "question_id": "159", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between two consecutive major ticks on the Y-axis ?", + "choices": null, + "answer": "100", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1000, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "161": { + "question_id": "161", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the two numbers visible in the picture?", + "choices": null, + "answer": "71", + "extraction": "16", + "prediction": "16", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "163": { + "question_id": "163", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "7519", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 285, + "img_width": 637, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "165": { + "question_id": "165", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all cyan rubber cylinders. Subtract all tiny shiny cubes. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "167": { + "question_id": "167", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the biggest zero of this function?", + "choices": null, + "answer": "2", + "extraction": "-3", + "prediction": "-3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1920, + "img_width": 1920, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "169": { + "question_id": "169", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between two consecutive major ticks on the Y-axis ?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1049, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "171": { + "question_id": "171", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many cinnamon rolls are there?", + "choices": null, + "answer": "20", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 190, + "img_width": 467, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "173": { + "question_id": "173", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of small rubber buss behind the big green road bike less than the number of suvs that are behind the large brown matte truck?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "175": { + "question_id": "175", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of accuracies of the algorithm liver for all the datasets?", + "choices": null, + "answer": "24", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "177": { + "question_id": "177", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of brown tandem bikes that are to the left of the small blue matte car greater than the number of tiny blue biplanes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "179": { + "question_id": "179", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0c\u5df2\u77e5AC\uff1d4cm\uff0c\u82e5\u25b3ACD\u7684\u5468\u957f\u4e3a14cm\uff0c\u5219ABCD\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 14cm\n(B) 28cm\n(C) 10cm\n(D) 20cm", + "choices": [ + "14cm", + "28cm", + "10cm", + "20cm" + ], + "answer": "20cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "14cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 94, + "img_width": 157, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "181": { + "question_id": "181", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option is correct?\nChoices:\n(A) A\n(B) B\n(C) C", + "choices": [ + "A", + "B", + "C" + ], + "answer": "C", + "extraction": "A", + "prediction": "A", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 332, + "img_width": 864, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "183": { + "question_id": "183", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown cubes. Subtract all gray cylinders. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "4", + "prediction": "4", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "185": { + "question_id": "185", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: An image has the gray level PDF $p_r(r)$ shown in Fig. Q1a. One wants to do histogram specification SO that the processed image will have the specified $p_z(z)$ shown in Fig. Q1b. Can we use intensity mapping function $T: z=1-r$ to achieve the goal?\nChoices:\n(A) True\n(B) False", + "choices": [ + "True", + "False" + ], + "answer": "False", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "True", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 376, + "img_width": 724, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "187": { + "question_id": "187", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9015", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 279, + "img_width": 634, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "189": { + "question_id": "189", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest accuracy reported in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "191": { + "question_id": "191", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the volume of the air carriers in Ethiopia greater than the average volume of the air carriers in Ethiopia taken over all years ?", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1116, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "193": { + "question_id": "193", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red things. Subtract all cylinders. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "195": { + "question_id": "195", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u662f\u2299O\u7684\u76f4\u5f84\uff0cC\uff0cD\u4e24\u70b9\u5728\u2299O\u4e0a\uff0c\u2220BCD\uff1d25\u00b0\uff0c\u5219\u2220AOD\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 120\u00b0\n(B) 125\u00b0\n(C) 130\u00b0\n(D) 135\u00b0", + "choices": [ + "120\u00b0", + "125\u00b0", + "130\u00b0", + "135\u00b0" + ], + "answer": "130\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "120\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 95, + "img_width": 110, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "197": { + "question_id": "197", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many sequences have negative Influence Scores?", + "choices": null, + "answer": "2", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "bar chart", + "grade": "college", + "img_height": 772, + "img_width": 1766, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "199": { + "question_id": "199", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Figure 23-42 is a section of a conducting rod of radius $R_1=1.30 \\mathrm{~mm}$ and length $L=$ $11.00 \\mathrm{~m}$ inside a thin-walled coaxial conducting cylindrical shell of radius $R_2=10.0 R_1$ and the (same) length $L$. The net charge on the rod is $Q_1=+3.40 \\times 10^{-12} \\mathrm{C}$; that on the shell is $Q_2=-2.00 Q_1$. What is the magnitude $E$ of the electric field at radial distance $r=2.00 R_2$?", + "choices": null, + "answer": "0.21", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 303, + "img_width": 262, + "language": "english", + "skills": [ + "algebraic reasoning", + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "201": { + "question_id": "201", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of all the values in the border group?", + "choices": null, + "answer": "19", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "203": { + "question_id": "203", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u57285\u00d74\u7684\u6b63\u65b9\u5f62\u7f51\u683c\u4e2d\uff0c\u6bcf\u4e2a\u5c0f\u6b63\u65b9\u5f62\u7684\u8fb9\u957f\u90fd\u662f1\uff0c\u25b3ABC\u7684\u9876\u70b9\u90fd\u5728\u8fd9\u4e9b\u5c0f\u6b63\u65b9\u5f62\u7684\u9876\u70b9\u4e0a\uff0c\u5219tan\u2220BAC\u7684\u503c\u4e3a\uff08\uff09\nChoices:\n(A) \\frac{4}{3}\n(B) 0.75\n(C) 0.6\n(D) 0.8", + "choices": [ + "\\frac{4}{3}", + "0.75", + "0.6", + "0.8" + ], + "answer": "\\frac{4}{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{4}{3}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 151, + "img_width": 172, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "205": { + "question_id": "205", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A statistician analyzed the number of runs scored by players last season. How many players scored more than 2 runs last season?'", + "choices": null, + "answer": "24", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 190, + "img_width": 351, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "207": { + "question_id": "207", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms magic and secure?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "209": { + "question_id": "209", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the highest value in black line chart ?", + "choices": null, + "answer": "28.3", + "extraction": "2.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "211": { + "question_id": "211", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracies higher than 2?", + "choices": null, + "answer": "6", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "213": { + "question_id": "213", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In which year there was lowest per capita real gross domestic product of ohio?", + "choices": null, + "answer": "2001", + "extraction": "2009", + "prediction": "2009", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "215": { + "question_id": "215", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Layla went on a camping trip and logged the number of miles she hiked each day. What is the range of the numbers?'", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 249, + "img_width": 212, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "217": { + "question_id": "217", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the degree of this function?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 202, + "img_width": 304, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "219": { + "question_id": "219", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "221": { + "question_id": "221", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, A, B, C are three points on \u2299O, \u2220ACB = 25.0, then the degree of \u2220BAO is ()\nChoices:\n(A) 50\u00b0\n(B) 55\u00b0\n(C) 60\u00b0\n(D) 65\u00b0", + "choices": [ + "50\u00b0", + "55\u00b0", + "60\u00b0", + "65\u00b0" + ], + "answer": "65\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 108, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "223": { + "question_id": "223", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this an even function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 776, + "img_width": 1430, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "225": { + "question_id": "225", + "query": "Hint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Fig. Q4 shows the contour of an object. Represent it with an 8-directional chain code. The resultant chain code should be normalized with respect to the starting point of the chain code. Represent the answer as a list with each digit as a element.", + "choices": null, + "answer": "[0, 2, 0, 2, 1, 7, 1, 2, 0, 3, 0, 6]", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "true_false": false, + "question_type": "free_form", + "answer_type": "list", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 560, + "img_width": 846, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "227": { + "question_id": "227", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 580, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "229": { + "question_id": "229", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the highest lysine level given?\nChoices:\n(A) 0.33%\n(B) 0.31%\n(C) 0.29%\n(D) 0.32%\n(E) 0.30%", + "choices": [ + "0.33%", + "0.31%", + "0.29%", + "0.32%", + "0.30%" + ], + "answer": "0.30%", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.33%", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2185, + "img_width": 1683, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "231": { + "question_id": "231", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model has the overall best ImageNet 10shot Accuracy score across different training steps?\nChoices:\n(A) Identity\n(B) Uniform\n(C) Uniform / Soft\n(D) Soft / Uniform\n(E) Soft\n(F) Dense", + "choices": [ + "Identity", + "Uniform", + "Uniform / Soft", + "Soft / Uniform", + "Soft", + "Dense" + ], + "answer": "Soft", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Identity", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 988, + "img_width": 2002, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "233": { + "question_id": "233", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 199, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "235": { + "question_id": "235", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the epigraph of a function f an infinite set?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 266, + "img_width": 412, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "237": { + "question_id": "237", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the Red squirrel and deer mouse population were to decrease, what would happen to the deer tick population?\nChoices:\n(A) increase\n(B) fluctuate\n(C) it would decrease\n(D) stay the same", + "choices": [ + "increase", + "fluctuate", + "it would decrease", + "stay the same " + ], + "answer": "it would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 346, + "img_width": 400, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "239": { + "question_id": "239", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Sky Blue the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 769, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "241": { + "question_id": "241", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many models in the table have a model size larger than 10B?", + "choices": null, + "answer": "11", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1184, + "img_width": 1570, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "243": { + "question_id": "243", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: \u0686\u0646\u062f \u0639\u062f\u062f \u0634\u06cc\u0631\u06cc\u0646\u06cc \u0645\u062b\u0644\u062b\u06cc \u0634\u06a9\u0644 \u062f\u0631 \u062c\u0639\u0628\u0647 \u0627\u0633\u062a\u061f", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1001, + "img_width": 564, + "language": "persian", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "ParsVQA-Caps", + "split": "testmini", + "task": "visual question answering" + }, + "245": { + "question_id": "245", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Damon need to buy a grilled steak and a mushroom pizza? (Unit: $)", + "choices": null, + "answer": "24", + "extraction": "22", + "prediction": "22", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 128, + "img_width": 259, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "247": { + "question_id": "247", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: A spaceship of mass $m=4.50 \\times 10^3 \\mathrm{~kg}$ is in a circular Earth orbit of radius $r=8.00 \\times 10^6 \\mathrm{~m}$ and period $T_0=118.6 \\mathrm{~min}=$ $7.119 \\times 10^3 \\mathrm{~s}$ when a thruster is fired in the forward direction to decrease the speed to $96.0 \\%$ of the original speed. What is the period $T$ of the resulting elliptical orbit (Figure)?", + "choices": null, + "answer": "6.36", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 906, + "img_width": 914, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "249": { + "question_id": "249", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all green rubber cubes. Subtract all red matte blocks. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "251": { + "question_id": "251", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all green balls. Subtract all shiny things. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "253": { + "question_id": "253", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many objects are preferred by more than 7 people in at least one category?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "255": { + "question_id": "255", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u2220BAC = 110.0, if A and B are symmetrical with respect to the line MP, A and C are symmetrical with respect to the line NQ, then the size of \u2220PAQ is ()\nChoices:\n(A) 70\u00b0\n(B) 55\u00b0\n(C) 40\u00b0\n(D) 30\u00b0", + "choices": [ + "70\u00b0", + "55\u00b0", + "40\u00b0", + "30\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "70\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 188, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "257": { + "question_id": "257", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u4ee5\u76f4\u89d2\u4e09\u89d2\u5f62\u7684\u4e09\u8fb9\u4e3a\u8fb9\u5411\u5916\u4f5c\u6b63\u65b9\u5f62\uff0c\u5176\u4e2d\u4e24\u4e2a\u6b63\u65b9\u5f62\u7684\u9762\u79ef\u5982\u56fe\u6240\u793a\uff0c\u5219\u6b63\u65b9\u5f62A\u7684\u9762\u79ef\u4e3a\uff08\uff09\nChoices:\n(A) 6\n(B) 36\n(C) 64\n(D) 8", + "choices": [ + "6", + "36", + "64", + "8" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 119, + "img_width": 109, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "259": { + "question_id": "259", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large yellow metal blocks. Subtract all gray metallic cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "261": { + "question_id": "261", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 345, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "263": { + "question_id": "263", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "38", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 117, + "img_width": 113, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "265": { + "question_id": "265", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Justine's P.E. class participated in a push-up competition, and Justine wrote down how many push-ups each person could do. How many people did at least 60 push-ups? (Unit: people)", + "choices": null, + "answer": "11", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 136, + "img_width": 329, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "267": { + "question_id": "267", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What shape of a leaf is similar to Serrate, but has smaller, evenly-spaced teeth?\nChoices:\n(A) Undulate\n(B) Sinuate\n(C) Serrulate\n(D) Entire", + "choices": [ + "Undulate", + "Sinuate", + "Serrulate", + "Entire" + ], + "answer": "Serrulate", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Undulate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 306, + "img_width": 529, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "269": { + "question_id": "269", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the elevation angle of the top of a building is 30.0 when viewed from point A in the air by a hot air balloon, and the depression angle of this building is 60.0. The horizontal distance between the hot air balloon and the building is 120.0. The height of this building is ()\nChoices:\n(A) 160m\n(B) 160\u221a{3}m\n(C) (160-160\u221a{3})m\n(D) 360m", + "choices": [ + "160m", + "160\u221a{3}m", + "(160-160\u221a{3})m", + "360m" + ], + "answer": "160\u221a{3}m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "160m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 159, + "img_width": 133, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "271": { + "question_id": "271", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find y\nChoices:\n(A) 3\n(B) 4.5\n(C) 5\n(D) 6", + "choices": [ + "3", + "4.5", + "5", + "6" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 287, + "img_width": 448, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "273": { + "question_id": "273", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: One diagonal of a rhombus is twice as long as the other diagonal. If the area of the rhombus is 169 square millimeters, what are the lengths of the diagonals?\nChoices:\n(A) 6.5\n(B) 13\n(C) 26\n(D) 52", + "choices": [ + "6.5", + "13", + "26", + "52" + ], + "answer": "26", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 237, + "img_width": 347, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "275": { + "question_id": "275", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220BAC = 90.0, AD \u22a5 BC at D, DE \u22a5 AB at E, AD = 3.0, DE = 2.0, then the length of CD is ()\nChoices:\n(A) \\frac{21}{2}\n(B) \\frac{\u221a{15}}{2}\n(C) \\frac{9}{2}\n(D) \\frac{3\u221a{5}}{2}", + "choices": [ + "\\frac{21}{2}", + "\\frac{\u221a{15}}{2}", + "\\frac{9}{2}", + "\\frac{3\u221a{5}}{2}" + ], + "answer": "\\frac{3\u221a{5}}{2}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{21}{2}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 107, + "img_width": 185, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "277": { + "question_id": "277", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which cube is identical to the unfolded net?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "D", + "extraction": "E", + "prediction": "E", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 591, + "img_width": 424, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "279": { + "question_id": "279", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would be directly affected by a decrease in sunlight?\nChoices:\n(A) grass\n(B) mouse\n(C) grasshopper\n(D) owl", + "choices": [ + "grass", + "mouse", + "grasshopper", + "owl" + ], + "answer": "grass", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "grass", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 423, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "281": { + "question_id": "281", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Was this a square pizza?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "283": { + "question_id": "283", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{WTY} \\cong \\overline{TWY}$. Find $x$.\nChoices:\n(A) 2\n(B) 4\n(C) 5\n(D) 10", + "choices": [ + "2", + "4", + "5", + "10" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 416, + "img_width": 559, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "285": { + "question_id": "285", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, it is known that AB is the diameter of \u2299O, if the degree of \u2220BOC is 50.0, then the degree of \u2220A is ()\nChoices:\n(A) 50\u00b0\n(B) 40\u00b0\n(C) 30\u00b0\n(D) 25\u00b0", + "choices": [ + "50\u00b0", + "40\u00b0", + "30\u00b0", + "25\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 100, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "287": { + "question_id": "287", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region is larger? R1 or R2?\nA. R1\nB. R2\nChoices:\n(A) R1\n(B) R2\n(C) R5\n(D) R3\n(E) R4", + "choices": [ + "R1", + "R2", + "R5", + "R3", + "R4" + ], + "answer": "R2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "R1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 325, + "img_width": 370, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "289": { + "question_id": "289", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 4 dots divided into 2 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 418, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "291": { + "question_id": "291", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which period the number of full time employees is the maximum?\nChoices:\n(A) Jul '21\n(B) Jun '21\n(C) Mar '21\n(D) May '21\n(E) Apr '21", + "choices": [ + "Jul '21", + "Jun '21", + "Mar '21", + "May '21", + "Apr '21" + ], + "answer": "May '21", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Jul '21", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "293": { + "question_id": "293", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, grasshopper population increase if\nChoices:\n(A) grouse decrease\n(B) chipmunk increases\n(C) grasses increases\n(D) elk increase", + "choices": [ + "grouse decrease", + "chipmunk increases", + "grasses increases", + "elk increase" + ], + "answer": "grasses increases", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "grouse decrease", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 156, + "img_width": 456, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "295": { + "question_id": "295", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "297": { + "question_id": "297", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of green buss greater than the number of blue school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "299": { + "question_id": "299", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the center and the rightmost person? (Unit: years)", + "choices": null, + "answer": "22", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1067, + "img_width": 1600, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "301": { + "question_id": "301", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model performs the best overall across the three stages in terms of Messenger training performance?\nChoices:\n(A) Dynalang\n(B) EMMA\n(C) R2D2\n(D) IMPALA", + "choices": [ + "Dynalang", + "EMMA", + "R2D2", + "IMPALA" + ], + "answer": "Dynalang", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Dynalang", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 524, + "img_width": 2012, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "303": { + "question_id": "303", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Lime Green less than Dim Gray?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 797, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "305": { + "question_id": "305", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people prefer the most preferred object?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "307": { + "question_id": "307", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Figure is an overhead view of the path taken by a race car driver as his car collides with the racetrack wall. Just before the collision, he is traveling at speed $v_i=70 \\mathrm{~m} / \\mathrm{s}$ along a straight line at $30^{\\circ}$ from the wall. Just after the collision, he is traveling at speed $v_f=50 \\mathrm{~m} / \\mathrm{s}$ along a straight line at $10^{\\circ}$ from the wall. His mass $m$ is $80 \\mathrm{~kg}$. The collision lasts for $14 \\mathrm{~ms}$. What is the magnitude of the average force on the driver during the collision?", + "choices": null, + "answer": "2.58", + "extraction": "100.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 466, + "img_width": 772, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning", + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "309": { + "question_id": "309", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The movie critic liked to count the number of actors in each movie he saw. How many movies had at least 30 actors but fewer than 47 actors? (Unit: movies)", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 136, + "img_width": 131, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "311": { + "question_id": "311", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "2", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1947, + "img_width": 1620, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "313": { + "question_id": "313", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "10", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 334, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "315": { + "question_id": "315", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram above, angle A is congruent to angle BED, and angle C is congruent to angle D. If the ratio of the length of AB to the length of EB is 5:1, and the area of the triangle BED is 5*a^2 + 10, what is the area of triangle ABC?\nChoices:\n(A) 5*a^2 + 10\n(B) 25*a^2 + 50\n(C) 25*a^2 + 100\n(D) 125*a^2 + 250\n(E) cannot be determined", + "choices": [ + "5*a^2 + 10", + "25*a^2 + 50", + "25*a^2 + 100", + "125*a^2 + 250", + "cannot be determined" + ], + "answer": "125*a^2 + 250", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5*a^2 + 10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 463, + "img_width": 749, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "317": { + "question_id": "317", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 361, + "img_width": 496, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "319": { + "question_id": "319", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Would most of the ground cover be considered weeds?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "321": { + "question_id": "321", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At a price of $330, is there a shortage or a surplus?'\nChoices:\n(A) shortage\n(B) surplus", + "choices": [ + "shortage", + "surplus" + ], + "answer": "surplus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "shortage", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 353, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "323": { + "question_id": "323", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Craig just downloaded the new game Gem Excavator on his phone. In the first level, Craig gains points for each green gem he finds. However, he loses points for each red gem he finds. The table shows how the gems affect Craig's points. Which color gem affects Craig's points less?'\nChoices:\n(A) green\n(B) red", + "choices": [ + "green", + "red" + ], + "answer": "green", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "green", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 94, + "img_width": 230, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "325": { + "question_id": "325", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Web Purple have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "327": { + "question_id": "327", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many items sold less than 1 units in at least one store?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "329": { + "question_id": "329", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The derivative of y at x=6 is ____ that at x=8\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "larger than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 2039, + "img_width": 2560, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "331": { + "question_id": "331", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Several people compared how many Web pages they had visited. What is the mean of the numbers?'", + "choices": null, + "answer": "64", + "extraction": "65", + "prediction": "65", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 311, + "img_width": 246, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "333": { + "question_id": "333", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find tan X\nChoices:\n(A) \\frac { 5 } { 12 }\n(B) \\frac { 12 } { 13 }\n(C) \\frac { 17 } { 12 }\n(D) \\frac { 12 } { 5 }", + "choices": [ + "\\frac { 5 } { 12 }", + "\\frac { 12 } { 13 }", + "\\frac { 17 } { 12 }", + "\\frac { 12 } { 5 }" + ], + "answer": "\\frac { 5 } { 12 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac { 5 } { 12 }", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 149, + "img_width": 297, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "335": { + "question_id": "335", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large brown matte balls. Subtract all blue cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "337": { + "question_id": "337", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) to eight.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "339": { + "question_id": "339", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u2299O\u4e2d\uff0cAB=AC\uff0c\u2220BAC\uff1d70\u00b0\uff0c\u5219\u2220AEC\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 65\u00b0\n(B) 75\u00b0\n(C) 50\u00b0\n(D) 55\u00b0", + "choices": [ + "65\u00b0", + "75\u00b0", + "50\u00b0", + "55\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 112, + "img_width": 115, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "341": { + "question_id": "341", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is six (_).\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "o'clock", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "343": { + "question_id": "343", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small purple metallic spheres. Subtract all small purple things. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "345": { + "question_id": "345", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many kites are there?", + "choices": null, + "answer": "25", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 429, + "img_width": 711, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "347": { + "question_id": "347", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of green metallic double buss less than the number of big purple rubber cruisers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "349": { + "question_id": "349", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which capability boasts the highest proportion (%)?\nChoices:\n(A) Rec\n(B) OCR\n(C) Know\n(D) Gen\n(E) Spat\n(F) Math", + "choices": [ + "Rec", + "OCR", + "Know", + "Gen", + "Spat", + "Math" + ], + "answer": "Rec", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Rec", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "bar chart", + "grade": "college", + "img_height": 1348, + "img_width": 1704, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "351": { + "question_id": "351", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer purple rubber objects that are to the left of the red object than tiny matte bicycles?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "353": { + "question_id": "353", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: At time $t=0$ a tank contains $Q_0 \\mathrm{lb}$ of salt dissolved in 100 gal of water; see Figure 2.3.1. Assume that water containing $\\frac{1}{4} \\mathrm{lb}$ of salt/gal is entering the tank at a rate of $r \\mathrm{gal} / \\mathrm{min}$ and that the well-stirred mixture is draining from the tank at the same rate. Set up the initial value problem that describes this flow process. By finding the amount of salt $Q(t)$ in the tank at any time, and the limiting amount $Q_L$ that is present after a very long time, if $r=3$ and $Q_0=2 Q_L$, find the time $T$ after which the salt level is within $2 \\%$ of $Q_L$.", + "choices": null, + "answer": "130.4", + "extraction": "1.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 938, + "img_width": 996, + "language": "english", + "skills": [ + "algebraic reasoning", + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "355": { + "question_id": "355", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the parallel lines a and b are intercepted by the straight line c. If \u22201 = 50.0, then the degree of \u22202 is ()\nChoices:\n(A) 150\u00b0\n(B) 130\u00b0\n(C) 110\u00b0\n(D) 100\u00b0", + "choices": [ + "150\u00b0", + "130\u00b0", + "110\u00b0", + "100\u00b0" + ], + "answer": "130\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "150\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 157, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "357": { + "question_id": "357", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Salmon the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 677, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "359": { + "question_id": "359", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Kylie spent a week at the beach and recorded the number of shells she found each day. According to the table, what was the rate of change between Thursday and Friday? (Unit: shells per day)", + "choices": null, + "answer": "-7", + "extraction": "-3", + "prediction": "-3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 241, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "361": { + "question_id": "361", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the mold are the cylindrical ports located? \nChoices:\n(A) Upper half\n(B) Lower half\n(C) Medial half\n(D) Lateral half", + "choices": [ + "Upper half", + "Lower half", + "Medial half", + "Lateral half" + ], + "answer": "Lower half", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Upper half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "medical image", + "grade": "college", + "img_height": 435, + "img_width": 596, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "PMC-VQA", + "split": "testmini", + "task": "visual question answering" + }, + "363": { + "question_id": "363", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny gray metal blocks. Subtract all purple things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "365": { + "question_id": "365", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big yellow metallic spheres. Subtract all tiny metal things. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "7", + "prediction": "7", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "367": { + "question_id": "367", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "14", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 429, + "img_width": 873, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "369": { + "question_id": "369", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function (f: R to R) surjective?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 331, + "img_width": 266, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "371": { + "question_id": "371", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220ABC\uff1d90\u00b0\uff0c\u70b9D\u3001E\u3001F\u5206\u522b\u662f\u8fb9AB\u3001BC\u3001CA\u7684\u4e2d\u70b9\uff0c\u82e5DE+BF\uff1d8\uff0c\u5219BF\u7684\u503c\u4e3a\uff08\uff09\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 146, + "img_width": 109, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "373": { + "question_id": "373", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the quadrilateral ABCD, \u2220BAD = 120.0, \u2220B = \u2220D = 90.0, if you find a point M on BC and CD respectively, so that the perimeter of \u25b3AMN is the smallest, then the degree of \u2220AMN + \u2220ANM is ()\nChoices:\n(A) 110\u00b0\n(B) 120\u00b0\n(C) 140\u00b0\n(D) 150\u00b0", + "choices": [ + "110\u00b0", + "120\u00b0", + "140\u00b0", + "150\u00b0" + ], + "answer": "120\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "110\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 161, + "img_width": 122, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "375": { + "question_id": "375", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the length of $AC$ in the isosceles triangle ABC. \nChoices:\n(A) 1.5\n(B) 7\n(C) 11\n(D) 12.5", + "choices": [ + "1.5", + "7", + "11", + "12.5" + ], + "answer": "7", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 293, + "img_width": 703, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "377": { + "question_id": "377", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 649, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "379": { + "question_id": "379", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram of the food web shown what will most directly be affected by the loss of the trees?\nChoices:\n(A) horses\n(B) cats\n(C) nothing\n(D) bears", + "choices": [ + "horses", + "cats", + "nothing", + "bears" + ], + "answer": "horses", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "horses", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 400, + "img_width": 570, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "381": { + "question_id": "381", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there more tiny cyan matte articulated buss left of the big school bus than small yellow matte double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "383": { + "question_id": "383", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What value you get , if you divide the largest bar value by 2 ?", + "choices": null, + "answer": "131253.5", + "extraction": "12.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "385": { + "question_id": "385", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Cyan have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 771, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "387": { + "question_id": "387", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Of the four balls in the photo, what is the percentage of them on the ground?", + "choices": null, + "answer": "100", + "extraction": "75", + "prediction": "75", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 485, + "img_width": 363, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "389": { + "question_id": "389", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At a price of $320, is there a shortage or a surplus?'\nChoices:\n(A) shortage\n(B) surplus", + "choices": [ + "shortage", + "surplus" + ], + "answer": "shortage", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "shortage", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 353, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "391": { + "question_id": "391", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, point O is the center of \u2299O, points A, B, and C are on \u2299O, AO \u2225 BC, \u2220AOB = 40.0, then the degree of \u2220OAC is equal to ()\nChoices:\n(A) 40\u00b0\n(B) 60\u00b0\n(C) 50\u00b0\n(D) 20\u00b0", + "choices": [ + "40\u00b0", + "60\u00b0", + "50\u00b0", + "20\u00b0" + ], + "answer": "20\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 96, + "img_width": 96, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "393": { + "question_id": "393", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest and the lowest dark blue bar?", + "choices": null, + "answer": "54", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "395": { + "question_id": "395", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average age of the people in this picture?", + "choices": null, + "answer": "10", + "extraction": "25", + "prediction": "25", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "397": { + "question_id": "397", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9A\u3001B\u3001C\u90fd\u5728\u534a\u5f84\u4e3a2\u7684\u2299O\u4e0a\uff0c\u2220C\uff1d30\u00b0\uff0c\u5219\u5f26AB\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 2.2\n(D) 2.5", + "choices": [ + "1", + "2", + "2.2", + "2.5" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 70, + "img_width": 73, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "399": { + "question_id": "399", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "6", + "extraction": "6", + "prediction": "6", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 241, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "401": { + "question_id": "401", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "403": { + "question_id": "403", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find TX if $E X=24$ and $D E=7$\nChoices:\n(A) 7\n(B) 24\n(C) 25\n(D) 32", + "choices": [ + "7", + "24", + "25", + "32" + ], + "answer": "32", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 221, + "img_width": 564, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "405": { + "question_id": "405", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "19", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1351, + "img_width": 1801, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "407": { + "question_id": "407", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9B\uff0cD\uff0cE\uff0cC\u5728\u540c\u4e00\u6761\u76f4\u7ebf\u4e0a\uff0c\u82e5\u25b3ABD\u224c\u25b3ACE\uff0c\u2220AEC\uff1d110\u00b0\uff0c\u5219\u2220DAE\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 30\u00b0\n(B) 40\u00b0\n(C) 50\u00b0\n(D) 60\u00b0", + "choices": [ + "30\u00b0", + "40\u00b0", + "50\u00b0", + "60\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 67, + "img_width": 76, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "409": { + "question_id": "409", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the radius of this circle?", + "choices": null, + "answer": "5", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 356, + "img_width": 358, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "411": { + "question_id": "411", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average percentage of population having access to electricity per year?", + "choices": null, + "answer": "100", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1081, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "413": { + "question_id": "413", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5df2\u77e5\uff1a\u5982\u56fe\uff0c\u25b3ABC\u4e2d\uff0cAB\uff1dAC\uff0cBD\u4e3a\u2220ABC\u7684\u5e73\u5206\u7ebf\uff0c\u2220BDC\uff1d75\u00b0\uff0c\u5219\u2220A\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 25\u00b0\n(B) 35\u00b0\n(C) 40\u00b0\n(D) 45\u00b0", + "choices": [ + "25\u00b0", + "35\u00b0", + "40\u00b0", + "45\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 132, + "img_width": 123, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "415": { + "question_id": "415", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average annual wage in Slovak Republic in the year 2019", + "choices": null, + "answer": "15017", + "extraction": "12000", + "prediction": "12000", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "417": { + "question_id": "417", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "8", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 748, + "img_width": 564, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "419": { + "question_id": "419", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) after nine.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "421": { + "question_id": "421", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An elevator cab of mass $m=500 \\mathrm{~kg}$ is descending with speed $v_i=4.0 \\mathrm{~m} / \\mathrm{s}$ when its supporting cable begins to slip, allowing it to fall with constant acceleration $\\vec{a}=\\vec{g} / 5$.\r\nDuring the $12 \\mathrm{~m}$ fall, what is the work $W_T$ done on the cab by the upward pull $\\vec{T}$ of the elevator cable?", + "choices": null, + "answer": "-47", + "extraction": "1200", + "prediction": "1200", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 1190, + "img_width": 550, + "language": "english", + "skills": [ + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "423": { + "question_id": "423", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Deep Pink less than Dark Gray?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 577, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "425": { + "question_id": "425", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5728Rt\u25b3ABC\u4e2d\uff0c\u2220C\uff1d90\u00b0\uff0c\u82e5AC\uff1d6\uff0cBC\uff1d8\uff0c\u5219cosA\u7684\u503c\u4e3a\uff08\uff09\nChoices:\n(A) 0.6\n(B) 0.8\n(C) 0.75\n(D) \\frac{4}{3}", + "choices": [ + "0.6", + "0.8", + "0.75", + "\\frac{4}{3}" + ], + "answer": "0.6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.6", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 120, + "img_width": 171, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "427": { + "question_id": "427", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people prefer the most preferred object?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "429": { + "question_id": "429", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people prefer the least preferred object?", + "choices": null, + "answer": "10", + "extraction": "10", + "prediction": "10", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "431": { + "question_id": "431", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, what would happen to dragonfly if all mayfly dies\nChoices:\n(A) remains the same\n(B) increase\n(C) decrease\n(D) NA", + "choices": [ + "remains the same", + "increase", + "decrease", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "remains the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 297, + "img_width": 464, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "433": { + "question_id": "433", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "5", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 350, + "img_width": 425, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "435": { + "question_id": "435", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of employed females who are not attending school greater than the average percentage of employed females who are not attending school taken over all years ?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 955, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "437": { + "question_id": "437", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fig.Q3 shows an excerpt of the transmission phase of a TCP connection. Assume the length of the IP header is 20 bytes. What is the ACK number at message 6?", + "choices": null, + "answer": "839", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 814, + "img_width": 638, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "439": { + "question_id": "439", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: is this function convex?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 256, + "img_width": 539, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "441": { + "question_id": "441", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "9", + "extraction": "9", + "prediction": "9", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 241, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "443": { + "question_id": "443", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure: In Rt\u25b3ABC, \u2220C = 90.0, AC = 8.0, AB = 10.0, then the value of sinB is equal to ()\nChoices:\n(A) \\frac{3}{5}\n(B) \\frac{4}{5}\n(C) \\frac{3}{4}\n(D) \\frac{4}{3}", + "choices": [ + "\\frac{3}{5}", + "\\frac{4}{5}", + "\\frac{3}{4}", + "\\frac{4}{3}" + ], + "answer": "\\frac{4}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{3}{5}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 80, + "img_width": 169, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "445": { + "question_id": "445", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Slate less than Saddle Brown?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 436, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "447": { + "question_id": "447", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Midnight Blue intersect Purple?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 685, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "449": { + "question_id": "449", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many miles per gallon do the average motorcycle get on the highway?", + "choices": null, + "answer": "40", + "extraction": "50", + "prediction": "50", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "451": { + "question_id": "451", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of small yellow metallic choppers that are behind the large cyan thing less than the number of brown metal double buss that are behind the small yellow shiny thing?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "453": { + "question_id": "453", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "4", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 116, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "455": { + "question_id": "455", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If x = 32 and r = 18, what is the length of the arc shown in the figure above?\nChoices:\n(A) 16*\\pi/5\n(B) 32*\\pi/5\n(C) 36*\\pi\n(D) 288*\\pi/5\n(E) 576*\\pi", + "choices": [ + "16*\\pi/5", + "32*\\pi/5", + "36*\\pi", + "288*\\pi/5", + "576*\\pi" + ], + "answer": "16*\\pi/5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "16*\\pi/5", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 353, + "img_width": 575, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "457": { + "question_id": "457", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "4525", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 97, + "img_width": 605, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "459": { + "question_id": "459", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large cyan matte balls. Subtract all tiny shiny objects. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "461": { + "question_id": "461", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A perceptual audio codec is used to compress an audio signal. The codec groups every 4 barks into a subband and then allocates bits to different subbands according to the result of a spectrum analysis based on a psychoacoustic model. All samples in the same subband are quantized with the same quantizer, and the bit resolution of which is allocated by the codec. (The Bark scale is a psychoacoustical scale proposed by Eberhard Zwicker in 1961.) Fig. Q1a shows the frequency spectrum of a windowed segment of audio signal. The psychoacoustic model shown in Fig. Q1b is used in the audio codec to derive the masking threshold for the audio segment. How many potential maskers in Fig. Q1a?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 488, + "img_width": 908, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "463": { + "question_id": "463", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large gray things. Subtract all small brown metallic balls. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "465": { + "question_id": "465", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Green the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 628, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "467": { + "question_id": "467", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The degree measures of minor arc $\\widehat{A C}$ and major arc $\\widehat{A D C}$ are $x$ and $y$ respectively. If $m\u2220ABC = 70\u00b0$, find $x$.\nChoices:\n(A) 90\n(B) 100\n(C) 110\n(D) 120", + "choices": [ + "90", + "100", + "110", + "120" + ], + "answer": "110", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "90", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 235, + "img_width": 499, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "469": { + "question_id": "469", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Sky Blue less than Chartreuse?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "471": { + "question_id": "471", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Lily and her friends recorded their scores while playing a board game. Which score did the greatest number of people receive?'", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 190, + "img_width": 351, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "473": { + "question_id": "473", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "12", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2604, + "img_width": 2500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "475": { + "question_id": "475", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 71, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "477": { + "question_id": "477", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past three.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "half", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "479": { + "question_id": "479", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How many times Norway data bigger than Italy data ?", + "choices": null, + "answer": "2.54", + "extraction": "1.75", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "481": { + "question_id": "481", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 404, + "img_width": 592, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "483": { + "question_id": "483", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, point C is on \u2299O, AE is the tangent of \u2299O, A is the tangent point, connect BC and extend to intersect AE at point D. If \u2220AOC = 80.0, then the degree of \u2220ADB is ()\nChoices:\n(A) 40\u00b0\n(B) 50\u00b0\n(C) 60\u00b0\n(D) 20\u00b0", + "choices": [ + "40\u00b0", + "50\u00b0", + "60\u00b0", + "20\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 129, + "img_width": 165, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "485": { + "question_id": "485", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9D\u5728\u7b49\u8fb9\u25b3ABC\u7684\u8fb9CB\u7684\u5ef6\u957f\u7ebf\u4e0a\uff0c\u70b9E\u5728\u7ebf\u6bb5BC\u4e0a\uff0c\u8fde\u63a5AD\uff0cAE\uff0c\u82e5DA\uff1dDE\uff0c\u4e14\u2220DAB\uff1d20\u00b0\uff0c\u90a3\u4e48\u2220EAC\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 20\u00b0\n(B) 15\u00b0\n(C) 10\u00b0\n(D) 5\u00b0", + "choices": [ + "20\u00b0", + "15\u00b0", + "10\u00b0", + "5\u00b0" + ], + "answer": "10\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 235, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "487": { + "question_id": "487", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer big cars behind the small brown shiny mountain bike than tiny objects on the right side of the bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "489": { + "question_id": "489", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For trapezoid ABCD shown above, AB = 24, AD = 23, and BC = 16. What is the length of segment CD?", + "choices": null, + "answer": "25", + "extraction": "18", + "prediction": "18", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 297, + "img_width": 426, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "491": { + "question_id": "491", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 540, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "493": { + "question_id": "493", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function differentiable at every point?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 847, + "img_width": 800, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "495": { + "question_id": "495", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer green things in front of the blue metallic car than choppers right of the chopper?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "497": { + "question_id": "497", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "499": { + "question_id": "499", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Quadrilateral $ABDC$ is a rectangle. If $m\\angle1 = 38$, find $m \\angle 2$\nChoices:\n(A) 33\n(B) 38\n(C) 52\n(D) 87", + "choices": [ + "33", + "38", + "52", + "87" + ], + "answer": "52", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "33", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 323, + "img_width": 559, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "501": { + "question_id": "501", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big red rubber cylinders. Subtract all blue objects. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "503": { + "question_id": "503", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the leftmost and the center person? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 225, + "img_width": 338, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "505": { + "question_id": "505", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the circle O with a radius of 5.0, the length of the chord AB is 8.0, then the distance from the center O to the chord AB is ()\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 100, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "507": { + "question_id": "507", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen if the hawk population increased?\nChoices:\n(A) mice would increase\n(B) sparrows increased\n(C) garter snakes would decrease\n(D) grass decreased", + "choices": [ + "mice would increase", + "sparrows increased", + "garter snakes would decrease", + "grass decreased" + ], + "answer": "garter snakes would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "mice would increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "509": { + "question_id": "509", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cadet Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 400, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "511": { + "question_id": "511", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people like the most preferred object in the whole chart?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "513": { + "question_id": "513", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the highest value in states that border West Virginia ?\nChoices:\n(A) 43.2%-63.6%\n(B) 45.2%-65.6%\n(C) 42.2%-62.6%\n(D) 41.2%-61.6%\n(E) 44.2%-64.6%", + "choices": [ + "43.2%-63.6%", + "45.2%-65.6%", + "42.2%-62.6%", + "41.2%-61.6%", + "44.2%-64.6%" + ], + "answer": "42.2%-62.6%", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "43.2%-63.6%", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "515": { + "question_id": "515", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: You would potentially see a decrease in which organism if gulls disappeared?\nChoices:\n(A) herring\n(B) kril\n(C) anchovy\n(D) phytoplankton", + "choices": [ + "herring", + "kril", + "anchovy", + "phytoplankton" + ], + "answer": "kril", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "herring", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 549, + "img_width": 398, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "517": { + "question_id": "517", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: At Bloomington Consulting, the head of human resources examined how the number of employees with health care benefits varied in response to policy changes. According to the table, what was the rate of change between 2014 and 2015? (Unit: employees per year)", + "choices": null, + "answer": "-1", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 275, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "519": { + "question_id": "519", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many Triangles do you see in the picture?", + "choices": null, + "answer": "12", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 852, + "img_width": 948, + "language": "english", + "skills": [ + "logical reasoning", + "geometry reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "521": { + "question_id": "521", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, point C is a point on \u2299O, \u2220C = 20.0, then the degree of \u2220BOC is ()\nChoices:\n(A) 20\u00b0\n(B) 30\u00b0\n(C) 40\u00b0\n(D) 60\u00b0", + "choices": [ + "20\u00b0", + "30\u00b0", + "40\u00b0", + "60\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 100, + "img_width": 120, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "523": { + "question_id": "523", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, a teaching interest group wants to measure the height of a tree CD. They firstly measured the elevation angle of the tree top C at point A as 30.0, and then proceeded 10.0 along the direction of AD to point B, and the elevation angle of tree top C measured at B is 60.0 (the three points A, B, and D are on the same straight line), then the height of the tree CD is ()\nChoices:\n(A) 10m\n(B) 5m\n(C) 5\u221a{3}m\n(D) 10\u221a{3}m", + "choices": [ + "10m", + "5m", + "5\u221a{3}m", + "10\u221a{3}m" + ], + "answer": "5\u221a{3}m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 179, + "img_width": 285, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "525": { + "question_id": "525", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest value shown on the X axis of first plot?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2209, + "img_width": 1711, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "527": { + "question_id": "527", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big shiny cars in front of the red airliner greater than the number of big purple road bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "529": { + "question_id": "529", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what number does the smaller arrow point to?", + "choices": null, + "answer": "1020", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 768, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "531": { + "question_id": "531", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) to five.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "533": { + "question_id": "533", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small cyan cubes. Subtract all large yellow rubber cubes. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "535": { + "question_id": "535", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "-8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "537": { + "question_id": "537", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of red rubber bicycles less than the number of cyan metal school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "539": { + "question_id": "539", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u70b9D\u3001E\u5206\u522b\u662f\u8fb9AB\u3001BC\u7684\u4e2d\u70b9\uff0c\u82e5\u25b3BDE\u7684\u5468\u957f\u662f6\uff0c\u5219\u25b3ABC\u7684\u5468\u957f\u662f\uff08\uff09\nChoices:\n(A) 8\n(B) 10\n(C) 12\n(D) 14", + "choices": [ + "8", + "10", + "12", + "14" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "8", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 71, + "img_width": 149, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "541": { + "question_id": "541", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the cubes is not identical to the unfolded net?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "D", + "extraction": "D", + "prediction": "D", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 560, + "img_width": 280, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "543": { + "question_id": "543", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer small purple matte cars than brown matte things?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "545": { + "question_id": "545", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Violet Red less than Crimson?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 764, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "547": { + "question_id": "547", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram below, which organisms will be most directly affected by a decrease in the amount of grass?\nChoices:\n(A) Insects\n(B) Hawk and snake\n(C) Snake and raccoon\n(D) Mouse and cricket", + "choices": [ + "Insects", + "Hawk and snake", + "Snake and raccoon", + "Mouse and cricket" + ], + "answer": "Insects", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Insects", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 377, + "img_width": 630, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "549": { + "question_id": "549", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, PA and PB are tangent to \u2299O to A and B respectively. Point C and point D are the moving points on line segments PA and PB, and CD always remains tangent to circle O. If PA = 8.0, then perimeter of \u25b3PCD is ()\nChoices:\n(A) 8\n(B) 12\n(C) 16\n(D) \u4e0d\u80fd\u786e\u5b9a", + "choices": [ + "8", + "12", + "16", + "\u4e0d\u80fd\u786e\u5b9a" + ], + "answer": "16", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "8", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 192, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "551": { + "question_id": "551", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest tattoos in male and the least in female?", + "choices": null, + "answer": "14", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "553": { + "question_id": "553", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Violet less than Chocolate?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "555": { + "question_id": "555", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this nest larger than a fist?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 640, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "557": { + "question_id": "557", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220BAC\uff1d90\u00b0\uff0c\u4ee5Rt\u25b3ABC\u7684\u4e09\u8fb9\u4e3a\u8fb9\u5206\u522b\u5411\u5916\u4f5c\u7b49\u8fb9\u4e09\u89d2\u5f62\u25b3A'BC\uff0c\u25b3AB'C\uff0c\u25b3ABC'\uff0c\u82e5\u25b3A'BC\uff0c\u25b3AB'C\u7684\u9762\u79ef\u5206\u522b\u662f10\u548c4\uff0c\u5219\u25b3ABC'\u7684\u9762\u79ef\u662f\uff08\uff09\nChoices:\n(A) 4\n(B) 6\n(C) 8\n(D) 9", + "choices": [ + "4", + "6", + "8", + "9" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 130, + "img_width": 155, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "559": { + "question_id": "559", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the highest number shown on the black outer part of the watch?", + "choices": null, + "answer": "55", + "extraction": "90", + "prediction": "90", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 768, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "561": { + "question_id": "561", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of gray rubber double buss right of the small red aeroplane the same as the number of small objects that are left of the tiny gray matte bicycle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "563": { + "question_id": "563", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which number on the monitor is higher?\nChoices:\n(A) top\n(B) bottom\n(C) left\n(D) right", + "choices": [ + "top", + "bottom", + "left", + "right" + ], + "answer": "bottom", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "top", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "565": { + "question_id": "565", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model can achieve the best ImageNet 10-shot Accuracy score?\nChoices:\n(A) Soft MoE\n(B) Experts Choice\n(C) Tokens Choice\n(D) Dense", + "choices": [ + "Soft MoE", + "Experts Choice", + "Tokens Choice", + "Dense" + ], + "answer": "Soft MoE", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Soft MoE", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 978, + "img_width": 1966, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "567": { + "question_id": "567", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the slug to the nearest inch. The slug is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 252, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "569": { + "question_id": "569", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which subject had the highest pulse rate in baseline period?", + "choices": null, + "answer": "1", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2284, + "img_width": 1786, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "571": { + "question_id": "571", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 613, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "573": { + "question_id": "573", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A race car driver kept track of how many laps he drove in the past 5 days. What is the mode of the numbers?'", + "choices": null, + "answer": "53", + "extraction": "55", + "prediction": "55", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 203, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "575": { + "question_id": "575", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Lines $l$, $m$, and $n$ are perpendicular bisectors of $\\triangle PQR$ and meet at $T$. If $TQ = 2x$, $PT = 3y - 1$, and $TR = 8$, find $z$.\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 287, + "img_width": 509, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "577": { + "question_id": "577", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Consider the following matrices:\r\n$$\r\n\\mathbf{A}=\\left(\\begin{array}{rrr}\r\n1 & 2 & -1 \\\\\r\n0 & 3 & 1 \\\\\r\n2 & 0 & 1\r\n\\end{array}\\right), \\quad \\mathbf{B}=\\left(\\begin{array}{rrr}\r\n2 & 1 & 0 \\\\\r\n0 & -1 & 2 \\\\\r\n1 & 1 & 3\r\n\\end{array}\\right), \\quad \\mathbf{C}=\\left(\\begin{array}{ll}\r\n2 & 1 \\\\\r\n4 & 3 \\\\\r\n1 & 0\r\n\\end{array}\\right)\r\n$$\r\nFind $|\\mathbf{A B}|$.", + "choices": null, + "answer": "-104", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 142, + "img_width": 533, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "579": { + "question_id": "579", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average number of documents required per shipment to export goods in Uganda per year?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1228, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "581": { + "question_id": "581", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large matte cubes. Subtract all matte blocks. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "583": { + "question_id": "583", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x. Round to the nearest tenth.\r\n\nChoices:\n(A) 5.8\n(B) 6.5\n(C) 14.2\n(D) 44.3", + "choices": [ + "5.8", + "6.5", + "14.2", + "44.3" + ], + "answer": "5.8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5.8", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 465, + "img_width": 319, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "585": { + "question_id": "585", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u77e9\u5f62ABCD\u4e2d\uff0cAB\uff1d2\uff0c\u2220AOB\uff1d60\u00b0\uff0c\u5219BD\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 4\n(B) 3\n(C) 2\n(D) 2\u221a{3}", + "choices": [ + "4", + "3", + "2", + "2\u221a{3}" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 148, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "587": { + "question_id": "587", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At 9.0 in the morning, a ship departs from point A and sails in the direction due east at a speed of 40.0 nautical miles per hour, and arrives at point B at 9.0 and 30.0 minutes. As shown in the figure, the island M is measured from A and B. In the direction of 45.0 north by east and 15.0 north by east, then the distance between B and island M is ()\nChoices:\n(A) 20\u6d77\u91cc\n(B) 20\u221a{2}\u6d77\u91cc\n(C) 15\u6d77\u91cc\n(D) 20\u6d77\u91cc", + "choices": [ + "20\u6d77\u91cc", + "20\u221a{2}\u6d77\u91cc", + "15\u6d77\u91cc", + "20\u6d77\u91cc" + ], + "answer": "20\u221a{2}\u6d77\u91cc", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u6d77\u91cc", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 124, + "img_width": 144, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "589": { + "question_id": "589", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number of things are either large objects behind the shiny double bus or tiny gray metal objects?", + "choices": null, + "answer": "5", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "591": { + "question_id": "591", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 600, + "img_width": 900, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "593": { + "question_id": "593", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average of longest light blue bar and shortest gray bar?", + "choices": null, + "answer": "273", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "595": { + "question_id": "595", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Navy Blue the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "597": { + "question_id": "597", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people prefer the least preferred object?", + "choices": null, + "answer": "10", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "599": { + "question_id": "599", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, AC = 6 and BC = 3. Point P lies on line AB between A and B such that line CP is perpendicular to line AB. Which of the following could be the length of line CP?\nChoices:\n(A) 2\n(B) 4\n(C) 5\n(D) 7\n(E) 8", + "choices": [ + "2", + "4", + "5", + "7", + "8" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 340, + "img_width": 393, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "601": { + "question_id": "601", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What's the ratio of smallest segment and second largest segment?", + "choices": null, + "answer": "0.33", + "extraction": "0.17", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 386, + "img_width": 210, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "603": { + "question_id": "603", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is cumulative increase in weight ( in grams) for \"GROUP C\" in third week ( give an approximate value) ?", + "choices": null, + "answer": "300", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2237, + "img_width": 1754, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "605": { + "question_id": "605", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large green matte cubes. Subtract all big green blocks. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "607": { + "question_id": "607", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow shiny things. Subtract all yellow metal things. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "609": { + "question_id": "609", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big green matte cylinders. Subtract all big brown cubes. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "611": { + "question_id": "611", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A shipping company keeps track of the number of boxes in each shipment they send out. How many shipments had exactly 56 boxes? (Unit: shipments)", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 180, + "img_width": 153, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "613": { + "question_id": "613", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many houses are there?", + "choices": null, + "answer": "10", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 87, + "img_width": 473, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "615": { + "question_id": "615", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If two sides of a triangle measure 12 and 7, which of the following cannot be the perimeter of the triangle?\nChoices:\n(A) 29\n(B) 34\n(C) 37\n(D) 38", + "choices": [ + "29", + "34", + "37", + "38" + ], + "answer": "38", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "29", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 195, + "img_width": 522, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "617": { + "question_id": "617", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The magnitude of the acceleration vector a is $10 \\mathrm{~cm} / \\mathrm{s}^2$. Use the figure to estimate the normal components of $\\mathbf{a}$.", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 484, + "img_width": 478, + "language": "english", + "skills": [ + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "619": { + "question_id": "619", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(4)?", + "choices": null, + "answer": "16", + "extraction": "-2", + "prediction": "-2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 666, + "img_width": 970, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "621": { + "question_id": "621", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The figure above is composed of 25 small triangles that are congruent and equilateral. If the area of triangle DFH is 10, what is the area of triangle AFK?\nChoices:\n(A) 40\n(B) 42.5\n(C) 50\n(D) 52.5\n(E) 62.5", + "choices": [ + "40", + "42.5", + "50", + "52.5", + "62.5" + ], + "answer": "62.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 315, + "img_width": 397, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "623": { + "question_id": "623", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is twelve (_).\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "o'clock", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "625": { + "question_id": "625", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of blue matte school buss greater than the number of large cyan metallic jets?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "627": { + "question_id": "627", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Some friends played a trivia game and recorded their scores. What is the mode of the numbers?'", + "choices": null, + "answer": "6", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 311, + "img_width": 155, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "629": { + "question_id": "629", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people prefer the object hut?", + "choices": null, + "answer": "20", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "631": { + "question_id": "631", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "0", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "633": { + "question_id": "633", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, $m\u22201 = 123$. Find the measure of $\\angle 14$.\nChoices:\n(A) 47\n(B) 57\n(C) 67\n(D) 123", + "choices": [ + "47", + "57", + "67", + "123" + ], + "answer": "57", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "47", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 330, + "img_width": 361, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "635": { + "question_id": "635", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, E is any point in \u25b1ABCD, if S~quadrilateral ABCD~ = 6.0, then the area of \u200b\u200bthe shaded part in the figure is ()\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5", + "choices": [ + "2", + "3", + "4", + "5" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 86, + "img_width": 179, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "637": { + "question_id": "637", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfa\u2225b\uff0c\u76f4\u7ebfa\u4e0e\u77e9\u5f62ABCD\u7684\u8fb9AB\uff0cAD\u5206\u522b\u4ea4\u4e8e\u70b9E\uff0cF\uff0c\u76f4\u7ebfb\u4e0e\u77e9\u5f62ABCD\u7684\u8fb9CB\uff0cCD\u5206\u522b\u4ea4\u4e8e\u70b9G\uff0cH\uff0e\u82e5\u2220AFE\uff1d30\u00b0\uff0c\u5219\u2220DHG\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 100\u00b0\n(B) 110\u00b0\n(C) 120\u00b0\n(D) 130\u00b0", + "choices": [ + "100\u00b0", + "110\u00b0", + "120\u00b0", + "130\u00b0" + ], + "answer": "120\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "100\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 108, + "img_width": 166, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "639": { + "question_id": "639", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What does the dial indicate as the top facing number?", + "choices": null, + "answer": "475", + "extraction": "450", + "prediction": "450", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1024, + "img_width": 768, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VizWiz", + "split": "testmini", + "task": "visual question answering" + }, + "641": { + "question_id": "641", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: The graph of the concentration function $c(t)$ is shown after a 7-mg injection of dye into a heart. Use Simpson's Rule to estimate the cardiac output.", + "choices": null, + "answer": "5.77", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 420, + "img_width": 828, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "643": { + "question_id": "643", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, CD is the diameter of \u2299O, chord DE \u2225 OA, if the degree of \u2220D is 50.0, then the degree of \u2220C is ()\nChoices:\n(A) 25\u00b0\n(B) 30\u00b0\n(C) 40\u00b0\n(D) 50\u00b0", + "choices": [ + "25\u00b0", + "30\u00b0", + "40\u00b0", + "50\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 125, + "img_width": 111, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "645": { + "question_id": "645", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAC\uff0cBD\u662f\u83f1\u5f62ABCD\u7684\u5bf9\u89d2\u7ebf\uff0cBH\u22a5AD\u4e8e\u70b9H\uff0c\u82e5AC\uff1d4\uff0cBD\uff1d3\uff0c\u5219BH\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 2.4\n(B) 2.5\n(C) 4.8\n(D) 5", + "choices": [ + "2.4", + "2.5", + "4.8", + "5" + ], + "answer": "2.4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2.4", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 113, + "img_width": 139, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "647": { + "question_id": "647", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the top view.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "B", + "extraction": "E", + "prediction": "E", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 900, + "img_width": 600, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "649": { + "question_id": "649", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many values are below 30 in Mainly are incidents of individual misconduct?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 461, + "img_width": 310, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "651": { + "question_id": "651", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For an assignment, Johnny looked at which countries got the most Nobel Prizes in various decades. In the 1990s, how many more Nobel Prize winners did Canada have than Italy? (Unit: Nobel Prize winners)", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 156, + "img_width": 224, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "653": { + "question_id": "653", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there at least three distinct shades of blue in this photo?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 425, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "655": { + "question_id": "655", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the value of Russia has the highest transport?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 507, + "img_width": 858, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "657": { + "question_id": "657", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Arkansas have a higher value than Indiana ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "659": { + "question_id": "659", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest value of navy blue bar?", + "choices": null, + "answer": "991", + "extraction": "1000", + "prediction": "1000", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "661": { + "question_id": "661", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this function most likely be?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a trigonometric function", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1274, + "img_width": 1732, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "663": { + "question_id": "663", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past six.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "665": { + "question_id": "665", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $h$ in the triangle.\nChoices:\n(A) 4.62\n(B) 5.66\n(C) 6.93\n(D) 8", + "choices": [ + "4.62", + "5.66", + "6.93", + "8" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4.62", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 161, + "img_width": 275, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "667": { + "question_id": "667", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year has the least difference between the used and new cars?", + "choices": null, + "answer": "2015", + "extraction": "2014", + "prediction": "2014", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "669": { + "question_id": "669", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, line segment AB = 10.0, M is the midpoint of line segment AB, C is the midpoint of line segment MB, N is a point of line segment AM, and MN = 1.0, the length of line segment NC ()\nChoices:\n(A) 2\n(B) 2.5\n(C) 3\n(D) 3.5", + "choices": [ + "2", + "2.5", + "3", + "3.5" + ], + "answer": "3.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 18, + "img_width": 187, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "671": { + "question_id": "671", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the size of the semicircle rounded to 2 decimal places?", + "choices": null, + "answer": "14.14", + "extraction": "3.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 312, + "img_width": 433, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "673": { + "question_id": "673", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of large green cars less than the number of brown rubber double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "675": { + "question_id": "675", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the cross section of a small reservoir dam is a right trapezoid, the width of crest BC is 6.0, the height of dam is 14.0, and the slope of the slope CD is i = 1.0:2.0, then the length of the dam bottom AD is ()\nChoices:\n(A) 13m\n(B) 34m\n(C) (6+14\u221a{3})m\n(D) 40m", + "choices": [ + "13m", + "34m", + "(6+14\u221a{3})m", + "40m" + ], + "answer": "34m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "13m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 83, + "img_width": 183, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "677": { + "question_id": "677", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of dirtbikes right of the large blue object less than the number of small green metallic cars in front of the tiny matte bicycle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "679": { + "question_id": "679", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b1ABCD, the diagonal AC and BD intersect at point O, if AC = 12.0, BD = 8.0, AB = 7.0, then the perimeter of \u25b3OAB is ()\nChoices:\n(A) 15\n(B) 17\n(C) 21\n(D) 27", + "choices": [ + "15", + "17", + "21", + "27" + ], + "answer": "17", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 73, + "img_width": 173, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "681": { + "question_id": "681", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the largest city in the nation where this plane is headquartered?\nChoices:\n(A) hong kong\n(B) osaka\n(C) shanghai\n(D) tokyo", + "choices": [ + "hong kong", + "osaka", + "shanghai", + "tokyo" + ], + "answer": "tokyo", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "hong kong", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "683": { + "question_id": "683", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 157, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "685": { + "question_id": "685", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to organism c if organism b increased?\nChoices:\n(A) decrease\n(B) increase\n(C) can't predict\n(D) stay same", + "choices": [ + "decrease", + "increase", + "can't predict", + "stay same" + ], + "answer": "increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 246, + "img_width": 574, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "687": { + "question_id": "687", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What could happen that would increase the number of krill?\nChoices:\n(A) increase in phytoplankton\n(B) decrease in penguins\n(C) increase in fish\n(D) increase in birds", + "choices": [ + "increase in phytoplankton", + "decrease in penguins", + "increase in fish", + "increase in birds" + ], + "answer": "increase in phytoplankton", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "increase in phytoplankton", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 396, + "img_width": 576, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "689": { + "question_id": "689", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are these people sitting in a circle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "691": { + "question_id": "691", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Calculate the missing item.", + "choices": null, + "answer": "256", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 500, + "img_width": 596, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "693": { + "question_id": "693", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the orange larger than the car?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "695": { + "question_id": "695", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Salmon greater than Dark Orchid?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 734, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "697": { + "question_id": "697", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the parallelogram ABCD, it is known that AB = 6.0, BC = 9.0, \u2220B = 30.0, then the area of \u200b\u200bthe parallelogram ABCD is ()\nChoices:\n(A) 12\n(B) 18\n(C) 27\n(D) 54", + "choices": [ + "12", + "18", + "27", + "54" + ], + "answer": "27", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "12", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 68, + "img_width": 205, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "699": { + "question_id": "699", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the center and the rightmost person? (Unit: years)", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2684, + "img_width": 4577, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "701": { + "question_id": "701", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 109, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "703": { + "question_id": "703", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the sum of highest value and lowest value of navy blue bar?", + "choices": null, + "answer": "2372.1", + "extraction": "1.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "705": { + "question_id": "705", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the heart wider than more than half the width of the thorax?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "medical image", + "grade": "college", + "img_height": 512, + "img_width": 419, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "VQA-RAD", + "split": "testmini", + "task": "visual question answering" + }, + "707": { + "question_id": "707", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0ca\u2225b\uff0c\u22201\uff1d60\u00b0\uff0c\u5219\u22202\u7684\u5927\u5c0f\u662f\uff08\uff09\nChoices:\n(A) 60\u00b0\n(B) 80\u00b0\n(C) 100\u00b0\n(D) 120\u00b0", + "choices": [ + "60\u00b0", + "80\u00b0", + "100\u00b0", + "120\u00b0" + ], + "answer": "120\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 120, + "img_width": 154, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "709": { + "question_id": "709", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(0)?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 393, + "img_width": 552, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "711": { + "question_id": "711", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 270, + "img_width": 369, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "713": { + "question_id": "713", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $x$.\nChoices:\n(A) 3\n(B) 4\n(C) 6\n(D) 7", + "choices": [ + "3", + "4", + "6", + "7" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 422, + "img_width": 521, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "715": { + "question_id": "715", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this a periodic function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1920, + "img_width": 1920, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "717": { + "question_id": "717", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is \\int_1^{\\infty} {1\\over x^{0.99}} dx finite according to this graph ?\n\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 350, + "img_width": 314, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "719": { + "question_id": "719", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Brenda graphed the daily low temperature for 5 days. What is the range of the numbers?'", + "choices": null, + "answer": "13", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 225, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "721": { + "question_id": "721", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many odd functions are in the graph?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 297, + "img_width": 441, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "723": { + "question_id": "723", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function convex?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 277, + "img_width": 468, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "725": { + "question_id": "725", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In Figure, suppose that Barbara's velocity relative to Alex is a constant $v_{B A}=52 \\mathrm{~km} / \\mathrm{h}$ and car $P$ is moving in the negative direction of the $x$ axis.\r\n(a) If Alex measures a constant $v_{P A}=-78 \\mathrm{~km} / \\mathrm{h}$ for car $P$, what velocity $v_{P B}$ will Barbara measure?", + "choices": null, + "answer": "-130", + "extraction": "-26", + "prediction": "-26", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 690, + "img_width": 976, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "727": { + "question_id": "727", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the largest and the smallest value in the chart?", + "choices": null, + "answer": "70", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "729": { + "question_id": "729", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest accuracy reported in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "731": { + "question_id": "731", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The train conductor made sure to count the number of passengers on each train. What is the smallest number of passengers? (Unit: passengers)", + "choices": null, + "answer": "40", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 180, + "img_width": 159, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "733": { + "question_id": "733", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Square ABCD. CT: tangent to semicircle. Find the angle \u2220CTD. Return the numeric value.", + "choices": null, + "answer": "63.4", + "extraction": "45.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 1018, + "img_width": 972, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "735": { + "question_id": "735", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big cyan things in front of the cyan rubber suv less than the number of big suvs that are behind the red bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "737": { + "question_id": "737", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the perimeter of the parallelogram.\nChoices:\n(A) 32\n(B) 39\n(C) 46\n(D) 78", + "choices": [ + "32", + "39", + "46", + "78" + ], + "answer": "78", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "32", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 179, + "img_width": 352, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "739": { + "question_id": "739", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Hannah need to buy a baking dish and a cookie jar? (Unit: $)", + "choices": null, + "answer": "23", + "extraction": "24", + "prediction": "24", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 160, + "img_width": 201, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "741": { + "question_id": "741", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "13", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1080, + "img_width": 1920, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "743": { + "question_id": "743", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the different between the highest unemployment rate and the lowest?", + "choices": null, + "answer": "10.53", + "extraction": "1.7", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "745": { + "question_id": "745", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2832, + "img_width": 4256, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "747": { + "question_id": "747", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\odot M$, $FL=24,HJ=48$, and $m \\widehat {HP}=65$. Find $m \\widehat {HJ}$.\nChoices:\n(A) 65\n(B) 120\n(C) 130\n(D) 155", + "choices": [ + "65", + "120", + "130", + "155" + ], + "answer": "130", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 467, + "img_width": 507, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "749": { + "question_id": "749", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b3ABC, DE \u2225 BC, if AB = 7.0, AC = 5.0, AD = 3.0, then DE = ()\nChoices:\n(A) \\frac{15}{4}cm\n(B) \\frac{20}{3}cm\n(C) \\frac{15}{7}cm\n(D) \\frac{20}{7}cm", + "choices": [ + "\\frac{15}{4}cm", + "\\frac{20}{3}cm", + "\\frac{15}{7}cm", + "\\frac{20}{7}cm" + ], + "answer": "\\frac{20}{7}cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{15}{4}cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 98, + "img_width": 181, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "751": { + "question_id": "751", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would most likely happen if Artemia was removed?\nChoices:\n(A) Seahorses would decrease\n(B) Rotifers would decrease\n(C) Mysids would decrease\n(D) Algae would decrease", + "choices": [ + "Seahorses would decrease", + "Rotifers would decrease", + "Mysids would decrease", + "Algae would decrease" + ], + "answer": "Seahorses would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Seahorses would decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 363, + "img_width": 862, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "753": { + "question_id": "753", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "755": { + "question_id": "755", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this function most likely be?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a polynomial", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 776, + "img_width": 1430, + "language": "english", + "skills": [ + "algebraic reasoning", + "statistical reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "757": { + "question_id": "757", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x to the nearest tenth. Assume that segments that appear to be tangent are tangent.\nChoices:\n(A) 7.2\n(B) 8\n(C) 12\n(D) 15", + "choices": [ + "7.2", + "8", + "12", + "15" + ], + "answer": "7.2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7.2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 165, + "img_width": 220, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "759": { + "question_id": "759", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 201, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "761": { + "question_id": "761", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens to the crayfish population if the Largemouth Bass and Northern Pike populations decrease?\nChoices:\n(A) Nothing\n(B) Decrease\n(C) Slightly Decrease\n(D) Increase", + "choices": [ + "Nothing", + "Decrease", + "Slightly Decrease", + "Increase" + ], + "answer": "Increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Nothing", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 319, + "img_width": 405, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "763": { + "question_id": "763", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny shiny balls. Subtract all purple objects. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "765": { + "question_id": "765", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Chartreuse the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 514, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "767": { + "question_id": "767", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the maximum value of y?", + "choices": null, + "answer": "5", + "extraction": "25", + "prediction": "25", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 429, + "img_width": 483, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "769": { + "question_id": "769", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagram below is a model of two solutions. Each blue ball represents one particle of solute. Which solution has a higher concentration of blue particles?\nChoices:\n(A) neither; their concentrations are the same\n(B) Solution A\n(C) Solution B", + "choices": [ + "neither; their concentrations are the same", + "Solution A", + "Solution B" + ], + "answer": "Solution A", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; their concentrations are the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "elementary school", + "img_height": 251, + "img_width": 378, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "771": { + "question_id": "771", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Base your answers on the diagram of a food chain below and on your knowledge of science. If the population of snakes increases, the population of frogs will most likely\nChoices:\n(A) decrease\n(B) remain the same\n(C) increase\n(D) None", + "choices": [ + "decrease", + "remain the same", + "increase", + "None" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 720, + "img_width": 960, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "773": { + "question_id": "773", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, point D is on the extended line of AB, passing point D is the tangent of \u2299O, and the tangent point is C, if \u2220A = 25.0, then \u2220D = ()\nChoices:\n(A) 25\u00b0\n(B) 40\u00b0\n(C) 50\u00b0\n(D) 65\u00b0", + "choices": [ + "25\u00b0", + "40\u00b0", + "50\u00b0", + "65\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 117, + "img_width": 163, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "775": { + "question_id": "775", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 724, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "777": { + "question_id": "777", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In rhombus LMPQ, $m \\angle Q L M=2 x^{2}-10$, $m \\angle Q P M=8 x$, and $M P=10$ . \r\nFind the perimeter of $LMPQ$\nChoices:\n(A) 10\n(B) 40\n(C) 70\n(D) 140", + "choices": [ + "10", + "40", + "70", + "140" + ], + "answer": "40", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 177, + "img_width": 337, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "779": { + "question_id": "779", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the cardiac silhouette less than half the diameter of the diaphragm?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "medical image", + "grade": "college", + "img_height": 841, + "img_width": 1023, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "VQA-RAD", + "split": "testmini", + "task": "visual question answering" + }, + "781": { + "question_id": "781", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\triangle CDF$, $K$ is the centroid and $DK=16$. Find $CD$.\nChoices:\n(A) 9\n(B) 12\n(C) 18\n(D) 18", + "choices": [ + "9", + "12", + "18", + "18" + ], + "answer": "18", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "9", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 540, + "img_width": 461, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "783": { + "question_id": "783", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In order to measure the width of parallel river AB, \u2220ACB = 30.0, \u2220ADB = 60.0, CD = 60.0, then the width of the river AB is ()\nChoices:\n(A) 30m\n(B) 30\u221a{3}m\n(C) (30\u221a{3}+30)m\n(D) (30\u221a{3}-30)m", + "choices": [ + "30m", + "30\u221a{3}m", + "(30\u221a{3}+30)m", + "(30\u221a{3}-30)m" + ], + "answer": "30\u221a{3}m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 87, + "img_width": 130, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "785": { + "question_id": "785", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Part of an ecosystem is shown in this diagram. Imagine the algae and floating plants are prevented from growing. How will that most likely affect this ecosystem?\nChoices:\n(A) The number of ducks will increase\n(B) The number of minnows will increase\n(C) There will be no effect on this ecosystem\n(D) The number of aquatic crustaceans will decrease", + "choices": [ + "The number of ducks will increase", + "The number of minnows will increase", + "There will be no effect on this ecosystem", + "The number of aquatic crustaceans will decrease" + ], + "answer": "The number of aquatic crustaceans will decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "The number of ducks will increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 258, + "img_width": 456, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "787": { + "question_id": "787", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of the zebra's stripes are horizontal?", + "choices": null, + "answer": "50", + "extraction": "90", + "prediction": "90", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "789": { + "question_id": "789", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the values of posse and mortar?", + "choices": null, + "answer": "10", + "extraction": "10", + "prediction": "10", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "791": { + "question_id": "791", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Given $V_s$ = 5V, $R_1$ = 1k\u03a9, $R_2$ = 2.2k\u03a9, $R_3$ = 2.2k\u03a9, $R_4$ = 1.5k\u03a9, and $R_L$ = 4.7k\u03a9. Determine the voltage and current across $R_L$. Answer in unit of V (3 sig.fig.).", + "choices": null, + "answer": "1.06", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 400, + "img_width": 444, + "language": "english", + "skills": [ + "algebraic reasoning", + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "793": { + "question_id": "793", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest Elo score for the agent using an offline RL algorithm?", + "choices": null, + "answer": "1578", + "extraction": "178", + "prediction": "178", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1056, + "img_width": 1922, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "795": { + "question_id": "795", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "75", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 601, + "img_width": 475, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "797": { + "question_id": "797", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the missing pattern in the picture?\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4\n(E) 5\n(F) 6", + "choices": [ + "1", + "2", + "3", + "4", + "5", + "6" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 291, + "img_width": 386, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "799": { + "question_id": "799", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Ruth need to buy a baking dish, a casserole dish, and an ice cream scoop? (Unit: $)", + "choices": null, + "answer": "13", + "extraction": "13", + "prediction": "13", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 128, + "img_width": 229, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "801": { + "question_id": "801", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A gymnast jotted down the number of cartwheels she did each day. What is the mode of the numbers?'", + "choices": null, + "answer": "10", + "extraction": "9", + "prediction": "9", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 280, + "img_width": 272, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "803": { + "question_id": "803", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "805": { + "question_id": "805", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the donut more than half eaten?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 434, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "807": { + "question_id": "807", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following leaf shapes would have the least amount of wind resistance and water loss?\nChoices:\n(A) Truncate\n(B) Acuminate\n(C) Rounded\n(D) Sagittate", + "choices": [ + "Truncate", + "Acuminate", + "Rounded", + "Sagittate" + ], + "answer": "Acuminate", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Truncate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 300, + "img_width": 508, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "809": { + "question_id": "809", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In a group of horses, some individuals have a black coat and others have a reddish-brown coat. In this group, the gene for the coat color trait has two alleles. The allele for a black coat (L) is dominant over the allele for a reddish-brown coat (l).\nThis Punnett square shows a cross between two horses. What is the expected ratio of offspring with a reddish-brown coat to offspring with a black coat? Choose the most likely ratio.\nChoices:\n(A) 1:3\n(B) 4:0\n(C) 3:1\n(D) 0:4\n(E) 2:2", + "choices": [ + "1:3", + "4:0", + "3:1", + "0:4", + "2:2" + ], + "answer": "2:2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1:3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 241, + "img_width": 233, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "811": { + "question_id": "811", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A machine at the candy factory dispensed different numbers of lemon-flavored candies into various bags. What is the smallest number of lemon-flavored candies? (Unit: lemon-flavored candies)", + "choices": null, + "answer": "34", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 136, + "img_width": 247, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "813": { + "question_id": "813", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest value on the X axis?", + "choices": null, + "answer": "30", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2264, + "img_width": 1768, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "815": { + "question_id": "815", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle N C L$\nChoices:\n(A) 60\n(B) 120\n(C) 240\n(D) 360", + "choices": [ + "60", + "120", + "240", + "360" + ], + "answer": "120", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 279, + "img_width": 367, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "817": { + "question_id": "817", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the straight line a \u2225 b, the point B is on the straight line b, and AB \u22a5 BC, \u22202 = 65.0, then the degree of \u22201 is ()\nChoices:\n(A) 65\u00b0\n(B) 25\u00b0\n(C) 35\u00b0\n(D) 45\u00b0", + "choices": [ + "65\u00b0", + "25\u00b0", + "35\u00b0", + "45\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 94, + "img_width": 171, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "819": { + "question_id": "819", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the value of $t$ in the parallelogram.\nChoices:\n(A) 6\n(B) 7\n(C) 8\n(D) 13", + "choices": [ + "6", + "7", + "8", + "13" + ], + "answer": "7", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 400, + "img_width": 428, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "821": { + "question_id": "821", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are most of the people young men?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 360, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "823": { + "question_id": "823", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: You can see how organisms are interconnected from the diagram given. What will be the effect if all the Killer whales are removed?\nChoices:\n(A) The population of tuna will increase\n(B) Mouse will decrease in number\n(C) The phytoplankton will decrease\n(D) The grasshopper will die", + "choices": [ + "The population of tuna will increase", + "Mouse will decrease in number", + "The phytoplankton will decrease", + "The grasshopper will die" + ], + "answer": "The population of tuna will increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "The population of tuna will increase", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1080, + "img_width": 1152, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "825": { + "question_id": "825", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of metallic road bikes that are behind the large bus less than the number of small matte double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "827": { + "question_id": "827", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer for the missing picture.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "D", + "extraction": "E", + "prediction": "E", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 1138, + "img_width": 828, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "829": { + "question_id": "829", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which matchstick needs to be moved in order to create a square?\nChoices:\n(A) Top\n(B) Bottom\n(C) Left\n(D) Right\n(E) Not possible", + "choices": [ + "Top", + "Bottom", + "Left", + "Right", + "Not possible" + ], + "answer": "Left", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Top", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 396, + "img_width": 378, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "831": { + "question_id": "831", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An author recorded how many words she wrote in the past 3 days. How many words in total did the author write on Thursday and Friday? (Unit: words)", + "choices": null, + "answer": "679", + "extraction": "635", + "prediction": "635", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 156, + "img_width": 236, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "833": { + "question_id": "833", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Phenylalanine (Phe, 5) is a naturally occurring amino acid. What is the energy of interaction between its phenyl group and the electric dipole moment of a neighbouring peptide group? Take the distance between the groups as $4.0 \\mathrm{~nm}$ and treat the phenyl group as a benzene molecule. The magnitude of the dipole moment of the peptide group is $\\mu=1.3 \\mathrm{D}$ and the polarizability volume of benzene is $\\alpha^{\\prime}=1.04 \\times 10^{-29} \\mathrm{~m}^3$.", + "choices": null, + "answer": "-4.3", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 372, + "img_width": 474, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "835": { + "question_id": "835", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percent of people are wearing blue?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "837": { + "question_id": "837", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tiny red motorbikes than big red choppers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "839": { + "question_id": "839", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many years have value less than 10%?", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "841": { + "question_id": "841", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Some friends compared the sizes of their stuffed animal collections. What is the median of the numbers?'", + "choices": null, + "answer": "9", + "extraction": "9", + "prediction": "9", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 265, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "843": { + "question_id": "843", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua greater than Red?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 752, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "845": { + "question_id": "845", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 390, + "img_width": 550, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "847": { + "question_id": "847", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which function grows the fastest as x increases?\nChoices:\n(A) red\n(B) purple\n(C) blue", + "choices": [ + "red", + "purple", + "blue" + ], + "answer": "red", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "red", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1294, + "img_width": 1706, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "849": { + "question_id": "849", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The 4 8x8 images shown below are encoded with JPEG coding. Based on their expected DCT (Discrete Cosine Transform) coefficients, Which image has the most non-zero AC coefficients? (a): Image A, (b): Image B, (c): Image C, (d): Image D.\nChoices:\n(A) (c)\n(B) (d)\n(C) (a)\n(D) (b)\n(E) (e)", + "choices": [ + "(c)", + "(d)", + "(a)", + "(b)", + "(e)" + ], + "answer": "(b)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(c)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 282, + "img_width": 940, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "851": { + "question_id": "851", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the net concessional disbursements from imf greater than 32000000 US$?", + "choices": null, + "answer": "2", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1139, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "853": { + "question_id": "853", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the diamond ABCD, \u2220BAD = 120.0, the length of the diagonal AC is 3.0, then the perimeter of the diamond ABCD is ()\nChoices:\n(A) 3\n(B) 6\n(C) 9\n(D) 12", + "choices": [ + "3", + "6", + "9", + "12" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 98, + "img_width": 169, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "855": { + "question_id": "855", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $x$ so that $a \u2225 b$.\nChoices:\n(A) 2.5\n(B) 14\n(C) 15\n(D) 16", + "choices": [ + "2.5", + "14", + "15", + "16" + ], + "answer": "14", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 250, + "img_width": 536, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "857": { + "question_id": "857", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "859": { + "question_id": "859", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "27", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 603, + "img_width": 750, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "861": { + "question_id": "861", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Crimson less than Gray?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 680, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "863": { + "question_id": "863", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Rhode Island have the lowest value in the USA ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "865": { + "question_id": "865", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Hot Pink have the lowest value?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 512, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "867": { + "question_id": "867", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A food industry researcher compiled the revenues of several pizzerias. How much did Dan's Deep Dish make from pizza sales? (Unit: $)", + "choices": null, + "answer": "22", + "extraction": "14", + "prediction": "14", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 465, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "869": { + "question_id": "869", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large yellow matte cubes. Subtract all metal things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "871": { + "question_id": "871", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 200, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "873": { + "question_id": "873", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many groups of bars contain at least one bar with value smaller than 40?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "875": { + "question_id": "875", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow things. Subtract all blue cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "877": { + "question_id": "877", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms squad and warm?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "879": { + "question_id": "879", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large gray rubber things. Subtract all small blue spheres. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "881": { + "question_id": "881", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the population of grasshopper decreases, the population of mouse will most likely do what?\nChoices:\n(A) decrease\n(B) remain the same\n(C) increase\n(D) NA", + "choices": [ + "decrease", + "remain the same", + "increase", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "883": { + "question_id": "883", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "15", + "extraction": "18", + "prediction": "18", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 207, + "img_width": 868, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "885": { + "question_id": "885", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Grayson counted the number of pieces of pepperoni on each pizza he made. What is the smallest number of pieces of pepperoni? (Unit: pieces of pepperoni)", + "choices": null, + "answer": "18", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 136, + "img_width": 225, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "887": { + "question_id": "887", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u25b3ABC is the inscribed triangle of \u2299O. If \u2220ABC = 70.0, then the degree of \u2220AOC is equal to ()\nChoices:\n(A) 140\u00b0\n(B) 130\u00b0\n(C) 120\u00b0\n(D) 110\u00b0", + "choices": [ + "140\u00b0", + "130\u00b0", + "120\u00b0", + "110\u00b0" + ], + "answer": "140\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "140\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 106, + "img_width": 119, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "889": { + "question_id": "889", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Purple the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 472, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "891": { + "question_id": "891", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracy lower than 8 in at least one dataset?", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "893": { + "question_id": "893", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the limit of the blue function as x approaches negative infinity?", + "choices": null, + "answer": "0", + "extraction": "-4", + "prediction": "-4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 331, + "img_width": 327, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "895": { + "question_id": "895", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model has the lowest Audio-Audio Similarity and Text-Audio Similarity scores overall?\nChoices:\n(A) MusicLDM (mix-up)\n(B) MusicLDM (original)\n(C) MusicLDM (BLM)\n(D) MusicLDM (BAM)\n(E) MuBERT", + "choices": [ + "MusicLDM (mix-up)", + "MusicLDM (original)", + "MusicLDM (BLM)", + "MusicLDM (BAM)", + "MuBERT" + ], + "answer": "MuBERT", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "MusicLDM (mix-up)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "violin plot", + "grade": "college", + "img_height": 682, + "img_width": 1882, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "897": { + "question_id": "897", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use a calculator to find the measure of $\u2220J$ to the nearest degree.\nChoices:\n(A) 33\n(B) 40\n(C) 50\n(D) 57", + "choices": [ + "33", + "40", + "50", + "57" + ], + "answer": "40", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "33", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 223, + "img_width": 352, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "899": { + "question_id": "899", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number comes next?", + "choices": null, + "answer": "2123", + "extraction": "1357", + "prediction": "1357", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 185, + "img_width": 406, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "901": { + "question_id": "901", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all shiny spheres. Subtract all big red matte spheres. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "903": { + "question_id": "903", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, if \u2220ABC = 30.0, then the degree of \u2220AOC is ()\nChoices:\n(A) 30\u00b0\n(B) 45\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "30\u00b0", + "45\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "60\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 112, + "img_width": 110, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "905": { + "question_id": "905", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of large red cars behind the metal car less than the number of blue matte tandem bikes that are behind the big blue rubber utility bike?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "907": { + "question_id": "907", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When the military expenditure value was lower than 0.2%?", + "choices": null, + "answer": "1970", + "extraction": "1970", + "prediction": "1970", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "909": { + "question_id": "909", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b3ABC, DE \u2225 BC, if AD = 1.0, DB = 2.0, then the value of \\frac ADAB is ()\nChoices:\n(A) \\frac{2}{3}\n(B) \\frac{1}{4}\n(C) \\frac{1}{3}\n(D) \\frac{1}{2}", + "choices": [ + "\\frac{2}{3}", + "\\frac{1}{4}", + "\\frac{1}{3}", + "\\frac{1}{2}" + ], + "answer": "\\frac{1}{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{2}{3}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 132, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "911": { + "question_id": "911", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the smaller picture below the larger picture?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "913": { + "question_id": "913", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Cyan have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 763, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "915": { + "question_id": "915", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to the Lion population if the Gum Tree population decreased?\nChoices:\n(A) Unable to determine.\n(B) Nothing would happen.\n(C) It would also decrease.\n(D) It would increase.", + "choices": [ + "Unable to determine.", + "Nothing would happen.", + "It would also decrease.", + "It would increase." + ], + "answer": "It would also decrease.", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Unable to determine.", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 740, + "img_width": 528, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "917": { + "question_id": "917", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the ratio of the number of procedures to register a business in 2004 to that in 2007?", + "choices": null, + "answer": "1", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 939, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "919": { + "question_id": "919", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many items sold more than 3 units in at least one store?", + "choices": null, + "answer": "7", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "921": { + "question_id": "921", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x to the nearest tenth. Assume that segments that appear to be tangent are tangent.\nChoices:\n(A) 5\n(B) 8.1\n(C) 10.3\n(D) 21.6", + "choices": [ + "5", + "8.1", + "10.3", + "21.6" + ], + "answer": "21.6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 170, + "img_width": 226, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "923": { + "question_id": "923", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model achieves the highest score in terms of Rec?\nChoices:\n(A) Transformers Agent (GPT-4)\n(B) LLaMA-Adapter v2-7B\n(C) LLaVA-7B\n(D) Otter-9B \n(E) MM-ReAct-GPT-3.5\n(F) LLaVA-13B (LLaMA-2)\n(G) MM-ReAct-GPT-4", + "choices": [ + "Transformers Agent (GPT-4)", + "LLaMA-Adapter v2-7B", + "LLaVA-7B", + "Otter-9B ", + "MM-ReAct-GPT-3.5", + "LLaVA-13B (LLaMA-2)", + "MM-ReAct-GPT-4" + ], + "answer": "LLaVA-13B (LLaMA-2)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Transformers Agent (GPT-4)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1056, + "img_width": 1910, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "925": { + "question_id": "925", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Haley went to the store. She bought 3+9/10 pounds of pumpernickel bread crumbs. How much did she spend? (Unit: $)", + "choices": null, + "answer": "19.5", + "extraction": "15.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 130, + "img_width": 334, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "927": { + "question_id": "927", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0cAB\u7684\u5782\u76f4\u5e73\u5206\u7ebf\u4ea4AB\u4e8e\u70b9D\uff0c\u4ea4BC\u4e8e\u70b9E\uff0c\u8fde\u63a5AE\uff0e\u82e5AB\uff1d6\uff0c\u25b3ACE\u7684\u5468\u957f\u4e3a13\uff0c\u5219\u25b3ABC\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 19\n(B) 16\n(C) 29\n(D) 18", + "choices": [ + "19", + "16", + "29", + "18" + ], + "answer": "19", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "19", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 152, + "img_width": 199, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "929": { + "question_id": "929", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Tim need to buy a mystery game and a toy rocket? (Unit: $)", + "choices": null, + "answer": "85", + "extraction": "32", + "prediction": "32", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 192, + "img_width": 226, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "931": { + "question_id": "931", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u25b3ABC is the inscribed triangle of \u2299O, AB is the diameter of \u2299O, point D is a point on \u2299O, if \u2220ACD = 40.0, then the size of \u2220BAD is ()\nChoices:\n(A) 35\u00b0\n(B) 50\u00b0\n(C) 40\u00b0\n(D) 60\u00b0", + "choices": [ + "35\u00b0", + "50\u00b0", + "40\u00b0", + "60\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 123, + "img_width": 124, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "933": { + "question_id": "933", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Hector need to buy a European vacation package and an Australian vacation package? (Unit: $)", + "choices": null, + "answer": "9606", + "extraction": "1796", + "prediction": "1796", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 160, + "img_width": 344, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "935": { + "question_id": "935", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0cAD\uff1d6\uff0cAB\uff1d4\uff0cDE\u5e73\u5206\u2220ADC\u4ea4BC\u4e8e\u70b9E\uff0c\u5219BE\u7684\u957f\u662f\uff08\uff09\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5", + "choices": [ + "2", + "3", + "4", + "5" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 81, + "img_width": 140, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "937": { + "question_id": "937", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Periwinkle the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 785, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "939": { + "question_id": "939", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would be most affected if the clams all died?\nChoices:\n(A) squid\n(B) lantern fish\n(C) octopus\n(D) sea horse", + "choices": [ + "squid", + "lantern fish", + "octopus", + "sea horse" + ], + "answer": "octopus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "squid", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 764, + "img_width": 1162, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "941": { + "question_id": "941", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which is the next number in the series?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 327, + "img_width": 271, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "943": { + "question_id": "943", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between two consecutive major ticks on the Y-axis ?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1258, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "945": { + "question_id": "945", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 451, + "img_width": 610, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "947": { + "question_id": "947", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u2225CD\uff0cBC\u2225DE\uff0c\u2220A\uff1d45\u00b0\uff0c\u2220C\uff1d110\u00b0\uff0c\u5219\u2220AED\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 95\u00b0\n(B) 105\u00b0\n(C) 115\u00b0\n(D) 125\u00b0", + "choices": [ + "95\u00b0", + "105\u00b0", + "115\u00b0", + "125\u00b0" + ], + "answer": "115\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "95\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 170, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "949": { + "question_id": "949", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the combined percentage of Lowest ROI and Medium ROI in SEO?", + "choices": null, + "answer": "56", + "extraction": "35", + "prediction": "35", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "951": { + "question_id": "951", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $x$.\nChoices:\n(A) 10.25\n(B) 12.75\n(C) 18.75\n(D) 25.5", + "choices": [ + "10.25", + "12.75", + "18.75", + "25.5" + ], + "answer": "12.75", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10.25", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 427, + "img_width": 487, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "953": { + "question_id": "953", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of trees have leaves?", + "choices": null, + "answer": "50", + "extraction": "50", + "prediction": "50", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "955": { + "question_id": "955", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0e\u70b9O\u662f\u6b63\u4e94\u8fb9\u5f62ABCDE\u7684\u4e2d\u5fc3\uff0c\u2299O\u662f\u6b63\u4e94\u8fb9\u5f62\u7684\u5916\u63a5\u5706\uff0c\u2220ADE\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 30\u00b0\n(B) 32\u00b0\n(C) 36\u00b0\n(D) 40\u00b0", + "choices": [ + "30\u00b0", + "32\u00b0", + "36\u00b0", + "40\u00b0" + ], + "answer": "36\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 136, + "img_width": 136, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "957": { + "question_id": "957", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big brown buss behind the gray matte aeroplane greater than the number of yellow shiny scooters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "959": { + "question_id": "959", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The teachers at an elementary school counted how many desks they had in their classrooms. What is the median of the numbers?'", + "choices": null, + "answer": "32", + "extraction": "32", + "prediction": "32", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 230, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "961": { + "question_id": "961", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest value in blue bar?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "963": { + "question_id": "963", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For what x does f reach its local maximum?", + "choices": null, + "answer": "3", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 397, + "img_width": 441, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "965": { + "question_id": "965", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: whats the lowest number yard line that you can see?", + "choices": null, + "answer": "30", + "extraction": "30", + "prediction": "30", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 690, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "967": { + "question_id": "967", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the amount earned from national visitors greater than the average amount earned from national visitors taken over all years ?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1146, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "969": { + "question_id": "969", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Yellow Green have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 587, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "971": { + "question_id": "971", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can the boy reach the highest book?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "973": { + "question_id": "973", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many zeros does this function have?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 2039, + "img_width": 2560, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "975": { + "question_id": "975", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown matte objects. Subtract all blue metallic objects. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "977": { + "question_id": "977", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5df2\u77e5AB\u2225CD\uff0cAF\u4e0eCD\u4ea4\u4e8e\u70b9E\uff0cBE\u22a5AF\uff0c\u2220B\uff1d65\u00b0\uff0c\u5219\u2220DEF\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 65\u00b0\n(B) 5\u00b0\n(C) 15\u00b0\n(D) 25\u00b0", + "choices": [ + "65\u00b0", + "5\u00b0", + "15\u00b0", + "25\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 129, + "img_width": 250, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "979": { + "question_id": "979", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9079", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 279, + "img_width": 634, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "981": { + "question_id": "981", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the sum of 2002, 2003 and 2004?", + "choices": null, + "answer": "70.4", + "extraction": "6006.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "983": { + "question_id": "983", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest accuracy reported in the whole chart?", + "choices": null, + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "985": { + "question_id": "985", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the smallest percentage value recorded in the chart?", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "987": { + "question_id": "987", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A cross-section of an airplane wing is shown. Measurements of the thickness of the wing, in centimeters, at 20-centimeter intervals are 5.8, 20.3, 26.7, 29.0, 27.6, 27.3, 23.8, 20.5, 15.1, 8.7, and 2.8. Use the Midpoint Rule to estimate the area of the wing's cross-section.", + "choices": null, + "answer": "4232", + "extraction": "200", + "prediction": "200", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 338, + "img_width": 946, + "language": "english", + "skills": [ + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "989": { + "question_id": "989", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people like the most preferred object in the whole chart?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "991": { + "question_id": "991", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which functions have no zeros?\nChoices:\n(A) green and red\n(B) blue and orange\n(C) purple and grey", + "choices": [ + "green and red", + "blue and orange", + "purple and grey" + ], + "answer": "purple and grey", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "green and red", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 383, + "img_width": 616, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "993": { + "question_id": "993", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny yellow matte cylinders. Subtract all small blue rubber spheres. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "995": { + "question_id": "995", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the sum of the two bars in 18 to 21?", + "choices": null, + "answer": "18.27", + "extraction": "1.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "997": { + "question_id": "997", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is kx^2/2 larger than E at x=0?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 197, + "img_width": 537, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "999": { + "question_id": "999", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: The magnitude of the acceleration vector a is $10 \\mathrm{~cm} / \\mathrm{s}^2$. Use the figure to estimate the tangential components of $\\mathbf{a}$.", + "choices": null, + "answer": "4.5", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 484, + "img_width": 478, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "2": { + "question_id": "2", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the total volume of the measuring cup? (Unit: g)", + "choices": null, + "answer": "1000", + "extraction": "1000", + "prediction": "1000", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 768, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "4": { + "question_id": "4", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large rubber spheres. Subtract all big shiny cylinders. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "4", + "prediction": "4", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "6": { + "question_id": "6", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, if CB = 4.0, DB = 7.0, and D is the midpoint of AC, then the length of AC is ()\nChoices:\n(A) 6cm\n(B) 7cm\n(C) 4cm\n(D) 5cm", + "choices": [ + "6cm", + "7cm", + "4cm", + "5cm" + ], + "answer": "6cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6cm", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 30, + "img_width": 203, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "8": { + "question_id": "8", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny gray bicycles that are on the left side of the brown metal sedan greater than the number of things that are to the left of the tiny green bicycle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "10": { + "question_id": "10", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which object comes next?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "E", + "extraction": "D", + "prediction": "D", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 418, + "img_width": 376, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "12": { + "question_id": "12", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer metallic fighters than rubber objects?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "14": { + "question_id": "14", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny objects that are behind the small metal jet less than the number of tiny things left of the tiny sedan?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "16": { + "question_id": "16", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many items sold less than 5 units in at least one store?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "18": { + "question_id": "18", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The passage below describes an experiment. Read the passage and then follow the instructions below.\n\nLinda applied a thin layer of wax to the underside of her snowboard and rode the board straight down a hill. Then, she removed the wax and rode the snowboard straight down the hill again. She repeated the rides four more times, alternating whether she rode with a thin layer of wax on the board or not. Her friend Bob timed each ride. Linda and Bob calculated the average time it took to slide straight down the hill on the snowboard with wax compared to the average time on the snowboard without wax.\nFigure: snowboarding down a hill. Identify the question that Linda and Bob's experiment can best answer.\nChoices:\n(A) Does Linda's snowboard slide down a hill in less time when it has a thin layer of wax or a thick layer of wax?\n(B) Does Linda's snowboard slide down a hill in less time when it has a layer of wax or when it does not have a layer of wax?", + "choices": [ + "Does Linda's snowboard slide down a hill in less time when it has a thin layer of wax or a thick layer of wax?", + "Does Linda's snowboard slide down a hill in less time when it has a layer of wax or when it does not have a layer of wax?" + ], + "answer": "Does Linda's snowboard slide down a hill in less time when it has a layer of wax or when it does not have a layer of wax?", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Does Linda's snowboard slide down a hill in less time when it has a thin layer of wax or a thick layer of wax?", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "elementary school", + "img_height": 232, + "img_width": 302, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "20": { + "question_id": "20", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sum of smallest two bar is greater then the largest bar?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "22": { + "question_id": "22", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 785, + "img_width": 555, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "24": { + "question_id": "24", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 709, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "26": { + "question_id": "26", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Black greater than Deep Sky Blue?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 761, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "28": { + "question_id": "28", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{AB}$ is a diameter, $AC=8$ inches, and $BC=15$ inches. Find the radius of the circle.\nChoices:\n(A) 7.5\n(B) 8\n(C) 8.5\n(D) 17", + "choices": [ + "7.5", + "8", + "8.5", + "17" + ], + "answer": "8.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 431, + "img_width": 519, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "30": { + "question_id": "30", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the two chords AB and CD in the circle intersect at E, \u2220D = 35.0, \u2220AEC = 105.0, then \u2220C = ()\nChoices:\n(A) 60\u00b0\n(B) 70\u00b0\n(C) 80\u00b0\n(D) 85\u00b0", + "choices": [ + "60\u00b0", + "70\u00b0", + "80\u00b0", + "85\u00b0" + ], + "answer": "70\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 113, + "img_width": 117, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "32": { + "question_id": "32", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0cAB\uff1dAC\uff0c\u2220CAB\uff1d40\u00b0\uff0c\u5219\u2220D\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 40\u00b0\n(B) 50\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "40\u00b0", + "50\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "70\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 100, + "img_width": 168, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "34": { + "question_id": "34", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function continuous at each point?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 479, + "img_width": 479, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "36": { + "question_id": "36", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "9", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 800, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "38": { + "question_id": "38", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values smaller than 6?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "40": { + "question_id": "40", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown blocks. Subtract all large blue rubber things. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "42": { + "question_id": "42", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "8", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 539, + "img_width": 401, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "44": { + "question_id": "44", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Chase wants to buy 4 kilograms of oval beads and 5 kilograms of star-shaped beads. How much will he spend? (Unit: $)", + "choices": null, + "answer": "18", + "extraction": "14", + "prediction": "14", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 226, + "img_width": 305, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "46": { + "question_id": "46", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to the population of adult spiders if predator ate all the spider eggs?\nChoices:\n(A) Adult spider population would remain the same\n(B) Adult spider population would double.\n(C) Adults spider population would decrease\n(D) Adult spider population would increase.", + "choices": [ + "Adult spider population would remain the same", + "Adult spider population would double.", + "Adults spider population would decrease", + "Adult spider population would increase." + ], + "answer": "Adults spider population would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Adult spider population would remain the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 829, + "img_width": 1024, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "48": { + "question_id": "48", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle 3$.\nChoices:\n(A) 28\n(B) 38\n(C) 52\n(D) 62", + "choices": [ + "28", + "38", + "52", + "62" + ], + "answer": "38", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "28", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 426, + "img_width": 596, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "50": { + "question_id": "50", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the food web, what would likely happen if the number of large roach would decrease?\nChoices:\n(A) The population of steelheads would decrease.\n(B) The population of stickleback fry would increase.\n(C) The population of predatory insects would increase.\n(D) The population of predatory insects would decrease.", + "choices": [ + "The population of steelheads would decrease.", + "The population of stickleback fry would increase.", + "The population of predatory insects would increase.", + "The population of predatory insects would decrease." + ], + "answer": "The population of predatory insects would decrease.", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "The population of steelheads would decrease.", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 600, + "img_width": 633, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "52": { + "question_id": "52", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big red metallic spheres. Subtract all big brown matte things. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "54": { + "question_id": "54", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, the ratio of the length of line AB to the length of line AC is 2 : 5. If AC = 25, what is the length of line AB?\nChoices:\n(A) 8\n(B) 10\n(C) 15\n(D) 18\n(E) 20", + "choices": [ + "8", + "10", + "15", + "18", + "20" + ], + "answer": "10", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "8", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 310, + "img_width": 433, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "56": { + "question_id": "56", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the rectangle?", + "choices": null, + "answer": "6", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 295, + "img_width": 202, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "58": { + "question_id": "58", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Firebrick have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 760, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "60": { + "question_id": "60", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "22", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 381, + "img_width": 477, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "62": { + "question_id": "62", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cE\uff0cF\u5206\u522b\u662f\u83f1\u5f62ABCD\u7684\u8fb9AB\uff0cAD\u7684\u4e2d\u70b9\uff0c\u4e14AB\uff1d5\uff0cAC\uff1d6\uff0e\u5219EF\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 4\n(B) 5\n(C) 5.5\n(D) 6", + "choices": [ + "4", + "5", + "5.5", + "6" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 138, + "img_width": 160, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "64": { + "question_id": "64", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagrams below show two pure samples of gas in identical closed, rigid containers. Each colored ball represents one gas particle. Both samples have the same number of particles. Compare the average kinetic energies of the particles in each sample. Which sample has the higher temperature?\nChoices:\n(A) neither; the samples have the same temperature\n(B) sample A\n(C) sample B", + "choices": [ + "neither; the samples have the same temperature", + "sample A", + "sample B" + ], + "answer": "sample A", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; the samples have the same temperature", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "elementary school", + "img_height": 405, + "img_width": 550, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "66": { + "question_id": "66", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer for the missing picture.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "A", + "extraction": "E", + "prediction": "E", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 562, + "img_width": 320, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "68": { + "question_id": "68", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5c06\u4e00\u6839\u957f\u5ea6\u4e3a16cm\u81ea\u7136\u4f38\u76f4\u7684\u5f39\u6027\u76ae\u7b4bAB\u4e24\u7aef\u56fa\u5b9a\u5728\u6c34\u5e73\u7684\u684c\u9762\u4e0a\uff0c\u7136\u540e\u628a\u4e2d\u70b9C\u7ad6\u76f4\u5411\u4e0a\u62c9\u53476cm\u81f3D\u70b9\uff08\u5982\u56fe\uff09\uff0c\u5219\u8be5\u5f39\u6027\u76ae\u7b4b\u88ab\u62c9\u957f\u4e86\uff08\uff09\nChoices:\n(A) 2cm\n(B) 4cm\n(C) 6cm\n(D) 8cm", + "choices": [ + "2cm", + "4cm", + "6cm", + "8cm" + ], + "answer": "4cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 84, + "img_width": 252, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "70": { + "question_id": "70", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "8", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2600, + "img_width": 2266, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "72": { + "question_id": "72", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A real estate agent drove around the neighborhood and counted the number of houses on each block. How many blocks have exactly 36 houses? (Unit: blocks)", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 136, + "img_width": 197, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "74": { + "question_id": "74", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the difference of largest and smallest bar?", + "choices": null, + "answer": "47.6", + "extraction": "100.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "76": { + "question_id": "76", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens to fish if pelicans increase?\nChoices:\n(A) decrease\n(B) nothing\n(C) increase\n(D) none of the above", + "choices": [ + "decrease", + "nothing", + "increase", + "none of the above" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 947, + "img_width": 850, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "78": { + "question_id": "78", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Find the missing value.", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 394, + "img_width": 1062, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "80": { + "question_id": "80", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the food web, what will happen if all the algae died due to pesticides?\nChoices:\n(A) Crabs and limpets will decrease\n(B) Dolphins will increase\n(C) Sea gulls will become extinct\n(D) Star fish will increase", + "choices": [ + "Crabs and limpets will decrease", + "Dolphins will increase", + "Sea gulls will become extinct", + "Star fish will increase" + ], + "answer": "Crabs and limpets will decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Crabs and limpets will decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 199, + "img_width": 372, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "82": { + "question_id": "82", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A square is inscribed in a circle of area 18$\\pi$ square units. Find the length of a side of the square.\nChoices:\n(A) 3\n(B) 3 \\sqrt 2\n(C) 6\n(D) 6 \\sqrt 2", + "choices": [ + "3", + "3 \\sqrt 2", + "6", + "6 \\sqrt 2" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 202, + "img_width": 200, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "84": { + "question_id": "84", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: ABCD is a square. Inscribed Circle center is O. Find the the angle of \u2220AMK. Return the numeric value.", + "choices": null, + "answer": "130.9", + "extraction": "120.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 1220, + "img_width": 1194, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "86": { + "question_id": "86", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model has the highest Acc score when Pretrain Loss is equal to 1.80?\nChoices:\n(A) ICL\n(B) SFT\n(C) SFT 1/8\n(D) RFT k=100\n(E) RFT k=25\n(F) RET k=6\n(G) RFT U13B", + "choices": [ + "ICL", + "SFT", + "SFT 1/8", + "RFT k=100", + "RFT k=25", + "RET k=6", + "RFT U13B" + ], + "answer": "RFT U13B", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "ICL", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 1046, + "img_width": 1734, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "88": { + "question_id": "88", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A square is tangent to a line at point P in the figure above. What is the value of x?", + "choices": null, + "answer": "30", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 277, + "img_width": 442, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "90": { + "question_id": "90", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow matte blocks. Subtract all tiny brown cylinders. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "5", + "prediction": "5", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "92": { + "question_id": "92", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do the windows have a geometric shape that most houses have?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 375, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "94": { + "question_id": "94", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cD\u4e3a\u25b3ABC\u5185\u4e00\u70b9\uff0cCD\u5e73\u5206\u2220ACB\uff0cBD\u22a5CD\uff0c\u2220A\uff1d\u2220ABD\uff0c\u82e5\u2220DBC\uff1d54\u00b0\uff0c\u5219\u2220A\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 36\u00b0\n(B) 44\u00b0\n(C) 27\u00b0\n(D) 54\u00b0", + "choices": [ + "36\u00b0", + "44\u00b0", + "27\u00b0", + "54\u00b0" + ], + "answer": "27\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "36\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 74, + "img_width": 160, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "96": { + "question_id": "96", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: How many times Dissatisfied more than satisfied?", + "choices": null, + "answer": "3.9", + "extraction": "3.8", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 328, + "img_width": 186, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "98": { + "question_id": "98", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Find the value of the square in the figure.", + "choices": null, + "answer": "2", + "extraction": "7", + "prediction": "7", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 506, + "img_width": 900, + "language": "english", + "skills": [ + "logical reasoning", + "algebraic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "100": { + "question_id": "100", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of all the values in the ruling group?", + "choices": null, + "answer": "12", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "102": { + "question_id": "102", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The shape is made of unit squares. What is the area of the shape?", + "choices": null, + "answer": "6", + "extraction": "36", + "prediction": "36", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 156, + "img_width": 106, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "104": { + "question_id": "104", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?", + "choices": null, + "answer": "0.8", + "extraction": "1.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "106": { + "question_id": "106", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values smaller than 1?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "108": { + "question_id": "108", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Find out the average of the bottom two countries ??", + "choices": null, + "answer": "51.04", + "extraction": "40.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "110": { + "question_id": "110", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sum of two lowest bar is greater then the largest bar?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "112": { + "question_id": "112", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big cyan airliners less than the number of gray shiny utility bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "114": { + "question_id": "114", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, KL is tangent to $\\odot M$ at K. Find the value of x.\nChoices:\n(A) 6.00\n(B) 9.45\n(C) 18.9\n(D) 37.8", + "choices": [ + "6.00", + "9.45", + "18.9", + "37.8" + ], + "answer": "9.45", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6.00", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 273, + "img_width": 347, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "116": { + "question_id": "116", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which leaf has the most veins?\nChoices:\n(A) Acuminate\n(B) Truncate\n(C) Mucronate\n(D) Acute", + "choices": [ + "Acuminate", + "Truncate", + "Mucronate", + "Acute" + ], + "answer": "Acuminate", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Acuminate", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 187, + "img_width": 350, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "118": { + "question_id": "118", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the maximum value of this function?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 296, + "img_width": 600, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "120": { + "question_id": "120", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the degree of this function?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 320, + "img_width": 312, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "122": { + "question_id": "122", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer yellow regular buss than small yellow metallic school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "124": { + "question_id": "124", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: This type of leaf arrangement consists of at least three leaves attached to a node.\nChoices:\n(A) Whorled\n(B) Simple\n(C) Opposite\n(D) Alternate", + "choices": [ + "Whorled", + "Simple", + "Opposite", + "Alternate" + ], + "answer": "Whorled", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Whorled", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 225, + "img_width": 576, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "126": { + "question_id": "126", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the leftmost and the rigtmost person? (Unit: years)", + "choices": null, + "answer": "9", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 800, + "img_width": 623, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "128": { + "question_id": "128", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large metal blocks. Subtract all yellow cylinders. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "130": { + "question_id": "130", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1403, + "img_width": 1063, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "132": { + "question_id": "132", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u57284\u00d74\u7684\u6b63\u65b9\u5f62\u7f51\u683c\u4e2d\uff0c\u6bcf\u4e2a\u5c0f\u6b63\u65b9\u5f62\u7684\u8fb9\u957f\u5747\u4e3a1\uff0c\u70b9A\uff0cB\uff0cC\u90fd\u5728\u683c\u70b9\u4e0a\uff0cAD\u22a5BC\u4e8eD\uff0c\u5219AD\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 1.5\n(C) 2\n(D) \\frac{7}{3}", + "choices": [ + "1", + "1.5", + "2", + "\\frac{7}{3}" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 160, + "img_width": 155, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "134": { + "question_id": "134", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: People can use the engineering-design process to develop solutions to problems. One step in the process is testing if a potential solution meets the requirements of the design.\nThe passage below describes how the engineering-design process was used to test a solution to a problem. Read the passage. Then answer the question below.\n\nCooper was a landscape architect who was hired to design a new city park. The city council wanted the park to have space for outdoor concerts and to have at least 20% of the park shaded by trees. Cooper thought the concert area should be at least 150 meters from the road so traffic noise didn't interrupt the music. He developed three possible designs for the park with the concert area in a different location in each design. Then, he tested each design by measuring the distance between the road and the concert area.\nFigure: studying an architect's design. Which of the following could Cooper's test show?\nChoices:\n(A) if at least 20% of the park would be shaded by trees in each design\n(B) which design would have the least traffic noise in the concert area\n(C) which design would have the greatest distance between the concert area and the road", + "choices": [ + "if at least 20% of the park would be shaded by trees in each design", + "which design would have the least traffic noise in the concert area", + "which design would have the greatest distance between the concert area and the road" + ], + "answer": "which design would have the greatest distance between the concert area and the road", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "if at least 20% of the park would be shaded by trees in each design", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "high school", + "img_height": 232, + "img_width": 302, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "136": { + "question_id": "136", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest and the lowest value of blue bar?", + "choices": null, + "answer": "64", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 443, + "img_width": 415, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "138": { + "question_id": "138", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sandwich cut in half?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "140": { + "question_id": "140", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which food has the least carbs?\nChoices:\n(A) soup\n(B) water\n(C) sandwich\n(D) buns", + "choices": [ + "soup", + "water", + "sandwich", + "buns" + ], + "answer": "soup", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "soup", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 428, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "142": { + "question_id": "142", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is it split in half?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 425, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "144": { + "question_id": "144", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Natalie buys 4.6 kilograms of turmeric. What is the total cost? (Unit: $)", + "choices": null, + "answer": "13.8", + "extraction": "18.4", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 162, + "img_width": 210, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "146": { + "question_id": "146", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Kimberly's classmates revealed how many science articles they read. What is the range of the numbers?'", + "choices": null, + "answer": "4", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 286, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "148": { + "question_id": "148", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which leaf shape has the smallest base?\nChoices:\n(A) Hastate\n(B) Cordate\n(C) Sagittate\n(D) Decurrent", + "choices": [ + "Hastate", + "Cordate", + "Sagittate", + "Decurrent" + ], + "answer": "Decurrent", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Hastate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 161, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "150": { + "question_id": "150", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A, B, and C are three points on \u2299O, and the straight line CD and \u2299O are tangent to point C. If \u2220DCB = 40.0, then the degree of \u2220CAB is ()\nChoices:\n(A) 40\u00b0\n(B) 50\u00b0\n(C) 80\u00b0\n(D) 100\u00b0", + "choices": [ + "40\u00b0", + "50\u00b0", + "80\u00b0", + "100\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 144, + "img_width": 110, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "152": { + "question_id": "152", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfl1\u2225l2\uff0c\u5c06\u542b30\u00b0\u89d2\u7684\u76f4\u89d2\u4e09\u89d2\u677f\u6309\u5982\u56fe\u65b9\u5f0f\u653e\u7f6e\uff0c\u76f4\u89d2\u9876\u70b9\u5728l2\u4e0a\uff0c\u82e5\u22201\uff1d76\u00b0\uff0c\u5219\u22202\uff1d\uff08\uff09\nChoices:\n(A) 36\u00b0\n(B) 45\u00b0\n(C) 44\u00b0\n(D) 64\u00b0", + "choices": [ + "36\u00b0", + "45\u00b0", + "44\u00b0", + "64\u00b0" + ], + "answer": "44\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "36\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 208, + "img_width": 229, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "154": { + "question_id": "154", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this an odd function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 744, + "img_width": 1114, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "156": { + "question_id": "156", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the limit of the as x approaches 1 from the left side?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 291, + "img_width": 327, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "158": { + "question_id": "158", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 685, + "img_width": 911, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "160": { + "question_id": "160", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x.\nChoices:\n(A) 10\n(B) 11\n(C) 12\n(D) 13", + "choices": [ + "10", + "11", + "12", + "13" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 227, + "img_width": 270, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "162": { + "question_id": "162", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The bird watcher counted the number of birds in each flock that passed overhead. How many flocks had at least 17 birds but fewer than 33 birds? (Unit: flocks)", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 202, + "img_width": 117, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "164": { + "question_id": "164", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b1ABCD, CE \u22a5 AB, point E is the foot of perpendicular, if \u2220D = 55.0, then \u2220BCE = ()\nChoices:\n(A) 55\u00b0\n(B) 35\u00b0\n(C) 25\u00b0\n(D) 30\u00b0", + "choices": [ + "55\u00b0", + "35\u00b0", + "25\u00b0", + "30\u00b0" + ], + "answer": "35\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "55\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 84, + "img_width": 161, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "166": { + "question_id": "166", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which Shape is missing?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "B", + "extraction": "A", + "prediction": "A", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 816, + "img_width": 2028, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "168": { + "question_id": "168", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given that the Hue-Saturation subspace shown in Fig. Q2 is a perfect circle and that colors A, B and C can be represented as the 3 points shown in the subspace. Which color has the smallest saturation coefficient?\nChoices:\n(A) (c)\n(B) (a)\n(C) (e)\n(D) (d)\n(E) (b)", + "choices": [ + "(c)", + "(a)", + "(e)", + "(d)", + "(b)" + ], + "answer": "(b)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(c)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 454, + "img_width": 414, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "170": { + "question_id": "170", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: f(-1) is ____ f(0).\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "smaller than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 296, + "img_width": 600, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "172": { + "question_id": "172", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Seafoam less than Dark Salmon?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 524, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "174": { + "question_id": "174", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tiny cyan suvs that are behind the aeroplane than cyan utility bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "176": { + "question_id": "176", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $RS$ if $\\triangle QRS$ is an equilateral triangle.\nChoices:\n(A) 0.5\n(B) 1\n(C) 1.5\n(D) 2", + "choices": [ + "0.5", + "1", + "1.5", + "2" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 292, + "img_width": 305, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "178": { + "question_id": "178", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9A\u3001C\u5728\u2220FBD\u7684\u4e24\u6761\u8fb9BF\u3001BD\u4e0a\uff0cBE\u5e73\u5206\u2220FBD\uff0cCE\u5e73\u5206\u2220ACD\uff0c\u8fde\u63a5AE\uff0c\u82e5\u2220BEC\uff1d35\u00b0\uff0c\u5219\u2220FAE\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 35\u00b0\n(B) 45\u00b0\n(C) 55\u00b0\n(D) 65\u00b0", + "choices": [ + "35\u00b0", + "45\u00b0", + "55\u00b0", + "65\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 99, + "img_width": 129, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "180": { + "question_id": "180", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny brown cylinders. Subtract all tiny brown objects. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "182": { + "question_id": "182", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Green greater than Yellow?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 589, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "184": { + "question_id": "184", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values smaller than 0?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "186": { + "question_id": "186", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, CD is a plane mirror, the light is emitted from point A, reflected by point E on CD, and irradiated to point B. If the incident angle is \u03b1, AC \u22a5 CD, BD \u22a5 CD, the feet of perpendicular are C, D, and AC = 3.0, BD = 6.0, CD = 10.0, then the length of the line segment ED is ()\nChoices:\n(A) \\frac{20}{3}\n(B) \\frac{10}{3}\n(C) 7\n(D) \\frac{14}{3}", + "choices": [ + "\\frac{20}{3}", + "\\frac{10}{3}", + "7", + "\\frac{14}{3}" + ], + "answer": "\\frac{20}{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{20}{3}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 112, + "img_width": 183, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "188": { + "question_id": "188", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many methods in the table achieve an A-847 score higher than 20.0?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 634, + "img_width": 2226, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "190": { + "question_id": "190", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 132, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "192": { + "question_id": "192", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the diameter CD of \u2299O crosses the midpoint G of chord EF, \u2220DCF = 20.0, then \u2220EOD is equal to ()\nChoices:\n(A) 10\u00b0\n(B) 20\u00b0\n(C) 40\u00b0\n(D) 80\u00b0", + "choices": [ + "10\u00b0", + "20\u00b0", + "40\u00b0", + "80\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 127, + "img_width": 101, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "194": { + "question_id": "194", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: On average, how many people can commute on this vehicle?", + "choices": null, + "answer": "50", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 408, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "196": { + "question_id": "196", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\u6240\u793a\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u5df2\u77e5\u70b9D\uff0cE\uff0cF\u5206\u522b\u4e3a\u8fb9BC\uff0cAD\uff0cCE\u7684\u4e2d\u70b9\uff0c\u4e14S\u25b3ABC\uff1d4cm2\uff0c\u5219S\u25b3DEF\u7b49\u4e8e\uff08\uff09\nChoices:\n(A) 2cm2\n(B) 1cm2\n(C) 0.5cm2\n(D) 0.25cm2", + "choices": [ + "2cm2", + "1cm2", + "0.5cm2", + "0.25cm2" + ], + "answer": "0.5cm2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2cm2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 81, + "img_width": 110, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "198": { + "question_id": "198", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Calculate the missing value.\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4", + "choices": [ + "1", + "2", + "3", + "4" + ], + "answer": "1", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 756, + "img_width": 890, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "200": { + "question_id": "200", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Sky Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 404, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "202": { + "question_id": "202", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "204": { + "question_id": "204", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: \u0627\u0632 \u0633\u0645\u062a \u0631\u0627\u0633\u062a \u062a\u0635\u0648\u06cc\u0631 \u062f\u0631\u0628 \u062f\u0648\u0645 \u0686\u0646\u062f \u0634\u06cc\u0634\u0647 \u0628\u062f\u0648\u0646 \u0631\u0646\u06af \u062f\u0627\u0631\u0647\u061f", + "choices": null, + "answer": "12", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 376, + "img_width": 564, + "language": "persian", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "ParsVQA-Caps", + "split": "testmini", + "task": "visual question answering" + }, + "206": { + "question_id": "206", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the scale factor from $Q$ to $Q'$.\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5", + "choices": [ + "2", + "3", + "4", + "5" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 611, + "img_width": 731, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "208": { + "question_id": "208", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the leftmost and the rigtmost person? (Unit: years)", + "choices": null, + "answer": "5", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 195, + "img_width": 300, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "210": { + "question_id": "210", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 370, + "img_width": 493, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "212": { + "question_id": "212", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cornflower the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 403, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "214": { + "question_id": "214", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of amount earned from merchandise imports in Canada greater than the average percentage of amount earned from merchandise imports in Canada taken over all years ?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1109, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "216": { + "question_id": "216", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people like the most preferred object in the whole chart?", + "choices": null, + "answer": "90", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "218": { + "question_id": "218", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large red rubber blocks. Subtract all tiny red matte objects. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "220": { + "question_id": "220", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u2299O is the circumscribed circle of the quadrilateral ABCD, if \u2220O = 110.0, then the degree of \u2220C is ()\nChoices:\n(A) 125\u00b0\n(B) 120\u00b0\n(C) 105\u00b0\n(D) 90\u00b0", + "choices": [ + "125\u00b0", + "120\u00b0", + "105\u00b0", + "90\u00b0" + ], + "answer": "125\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "125\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 128, + "img_width": 124, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "222": { + "question_id": "222", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue shiny spheres. Subtract all big blue shiny cubes. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "224": { + "question_id": "224", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this a periodic function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 744, + "img_width": 1114, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "226": { + "question_id": "226", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past three.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "228": { + "question_id": "228", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of circle O, DB and DC are respectively tangent to circle O at points B and C. If \u2220ACE = 25.0, then the degree of \u2220D is ()\nChoices:\n(A) 50\u00b0\n(B) 55\u00b0\n(C) 60\u00b0\n(D) 65\u00b0", + "choices": [ + "50\u00b0", + "55\u00b0", + "60\u00b0", + "65\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 97, + "img_width": 137, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "230": { + "question_id": "230", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracy higher than 9 in at least one dataset?", + "choices": null, + "answer": "0", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "232": { + "question_id": "232", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagram below is a model of two solutions. Each pink ball represents one particle of solute. Which solution has a higher concentration of pink particles?\nChoices:\n(A) neither; their concentrations are the same\n(B) Solution B\n(C) Solution A", + "choices": [ + "neither; their concentrations are the same", + "Solution B", + "Solution A" + ], + "answer": "Solution B", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; their concentrations are the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 251, + "img_width": 378, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "234": { + "question_id": "234", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure shown above, AC = 6. What is the length of segment AB?\nChoices:\n(A) 3\n(B) 5\n(C) 6\n(D) 7\n(E) It cannot be determined from the information given", + "choices": [ + "3", + "5", + "6", + "7", + "It cannot be determined from the information given" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 378, + "img_width": 434, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "236": { + "question_id": "236", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $z$.\nChoices:\n(A) 7\n(B) 9\n(C) 12\n(D) 15", + "choices": [ + "7", + "9", + "12", + "15" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 423, + "img_width": 447, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "238": { + "question_id": "238", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find PT\nChoices:\n(A) 6\n(B) \\frac { 20 } { 3 }\n(C) 7\n(D) 22 / 3", + "choices": [ + "6", + "\\frac { 20 } { 3 }", + "7", + "22 / 3" + ], + "answer": "\\frac { 20 } { 3 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 250, + "img_width": 238, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "240": { + "question_id": "240", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2387, + "img_width": 3500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "242": { + "question_id": "242", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle A$ of quadrilateral ABCD\nChoices:\n(A) 45\n(B) 90\n(C) 135\n(D) 180", + "choices": [ + "45", + "90", + "135", + "180" + ], + "answer": "135", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 381, + "img_width": 621, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "244": { + "question_id": "244", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Aqua have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 500, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "246": { + "question_id": "246", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Assume that all gases are perfect and that data refer to 298 K unless otherwise stated. In 1995, the Intergovernmental Panel on Climate Change (IPCC) considered a global average temperature rise of $1.0-3.5^{\\circ} \\mathrm{C}$ likely by the year 2100 , with $2.0^{\\circ} \\mathrm{C}$ its best estimate. Because water vapour is itself a greenhouse gas, the increase in water vapour content of the atmosphere is of some concern to climate change experts. Predict the relative increase in water vapour in the atmosphere based on a temperature rises of $2.0 \\mathrm{~K}$, assuming that the relative humidity remains constant. (The present global mean temperature is $290 \\mathrm{~K}$, and the equilibrium vapour pressure of water at that temperature is 0.0189 bar.)", + "choices": null, + "answer": "13", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 216, + "img_width": 1098, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "248": { + "question_id": "248", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of green matte choppers greater than the number of large yellow shiny motorbikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "250": { + "question_id": "250", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The area $A$ of the shaded region is given. Find $x$. $A = 66$ cm$^2$ .\nChoices:\n(A) 4.6\n(B) 6.5\n(C) 13.0\n(D) 26.0", + "choices": [ + "4.6", + "6.5", + "13.0", + "26.0" + ], + "answer": "13.0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4.6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 286, + "img_width": 303, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "252": { + "question_id": "252", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Consider the infinitely long chain of resistors shown below. What is the resistance between terminals a and b if R=1?", + "choices": null, + "answer": "0.73", + "extraction": "0.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 169, + "img_width": 463, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "254": { + "question_id": "254", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big objects that are in front of the metal fighter less than the number of things that are behind the big metallic bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "256": { + "question_id": "256", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u25b3ABC\u4e2d\uff0cAD\u5e73\u5206\u2220BAC\uff0cAD\u4ea4BC\u4e8e\u70b9D\uff0cDE\u22a5AB\uff0c\u5782\u8db3\u4e3aE\uff0c\u82e5DE\uff1d3\uff0cAC\uff1d4\uff0c\u5219\u25b3ADC\u7684\u9762\u79ef\u4e3a\uff08\uff09\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 75, + "img_width": 148, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "258": { + "question_id": "258", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An employee at the craft store counted the number of red buttons in each bag of mixed buttons. How many bags had at least 60 red buttons but fewer than 81 red buttons?'", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 224, + "img_width": 156, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "260": { + "question_id": "260", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the derivative of the function positive between [1, 2] assuming that it's differentiable?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 368, + "img_width": 412, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "262": { + "question_id": "262", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between genres of tv shows watched by highest female and lowest female?", + "choices": null, + "answer": "39", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 756, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "264": { + "question_id": "264", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For Group C, in which week is the cumulative increase in weight , the highest?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2237, + "img_width": 1754, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "266": { + "question_id": "266", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which has the most uneven shape?\nChoices:\n(A) oblique\n(B) obtuse\n(C) cordate\n(D) truncate", + "choices": [ + "oblique", + "obtuse", + "cordate", + "truncate" + ], + "answer": "oblique", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "oblique", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 225, + "img_width": 240, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "268": { + "question_id": "268", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Colton wants to buy 1+3/10 kilograms of English muffins. How much will he spend? (Unit: $)", + "choices": null, + "answer": "10.4", + "extraction": "10.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 194, + "img_width": 273, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "270": { + "question_id": "270", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A and B are three points on \u2299O and AB = AC. Connect BO and CO, if \u2220ABC = 65.0, then the degree of \u2220BOC is ()\nChoices:\n(A) 50\u00b0\n(B) 65\u00b0\n(C) 100\u00b0\n(D) 130\u00b0", + "choices": [ + "50\u00b0", + "65\u00b0", + "100\u00b0", + "130\u00b0" + ], + "answer": "100\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 114, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "272": { + "question_id": "272", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time does the clock show?\nChoices:\n(A) 9:30\n(B) 1:30\n(C) 4:30\n(D) 5:30\n(E) 11:30", + "choices": [ + "9:30", + "1:30", + "4:30", + "5:30", + "11:30" + ], + "answer": "4:30", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "9:30", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 261, + "img_width": 261, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "274": { + "question_id": "274", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u3001BC\u3001CD\u3001DA\u90fd\u662f\u2299O\u7684\u5207\u7ebf\uff0c\u5df2\u77e5AD\uff1d2\uff0cBC\uff1d5\uff0c\u5219AB+CD\u7684\u503c\u662f\uff08\uff09\nChoices:\n(A) 14\n(B) 12\n(C) 9\n(D) 7", + "choices": [ + "14", + "12", + "9", + "7" + ], + "answer": "7", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "14", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 119, + "img_width": 151, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "276": { + "question_id": "276", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, it is known that the radius of \u2299O is 5.0 and the chord AB = 8.0, then the distance from the center O to AB is ()\nChoices:\n(A) 1mm\n(B) 2mm\n(C) 3mm\n(D) 4mm", + "choices": [ + "1mm", + "2mm", + "3mm", + "4mm" + ], + "answer": "3mm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1mm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 102, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "278": { + "question_id": "278", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the following objects, which one has the best PSNR score?\nChoices:\n(A) Lego\n(B) Mats\n(C) Mic\n(D) Ship", + "choices": [ + "Lego", + "Mats", + "Mic", + "Ship" + ], + "answer": "Mic", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Lego", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 940, + "img_width": 1478, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "280": { + "question_id": "280", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, ABCDEF is a regular hexagon, and its center is point O. What is the value of x?\nChoices:\n(A) 80\n(B) 60\n(C) 40\n(D) 30\n(E) 20", + "choices": [ + "80", + "60", + "40", + "30", + "20" + ], + "answer": "60", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "80", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 123, + "img_width": 130, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "282": { + "question_id": "282", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percent of the sun is showing?", + "choices": null, + "answer": "100", + "extraction": "100", + "prediction": "100", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "284": { + "question_id": "284", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the accuracy of the algorithm with lowest accuracy?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "286": { + "question_id": "286", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5c06\u4e00\u6839\u957f\u5ea6\u4e3a8cm\uff0c\u81ea\u7136\u4f38\u76f4\u7684\u5f39\u6027\u76ae\u7b4bAB\u4e24\u7aef\u56fa\u5b9a\u5728\u6c34\u5e73\u7684\u684c\u9762\u4e0a\uff0c\u7136\u540e\u628a\u76ae\u7b4b\u4e2d\u70b9C\u7ad6\u76f4\u5411\u4e0a\u62c9\u53473cm\u5230\u70b9D\uff0c\u5219\u6b64\u65f6\u8be5\u5f39\u6027\u76ae\u7b4b\u88ab\u62c9\u957f\u4e86\uff08\uff09\nChoices:\n(A) 6cm\n(B) 5cm\n(C) 4cm\n(D) 2cm", + "choices": [ + "6cm", + "5cm", + "4cm", + "2cm" + ], + "answer": "2cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 82, + "img_width": 250, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "288": { + "question_id": "288", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which of the following value ranges of \u03bb2 does the percentage of Attack Effectiveness begin to be lower than that of Diversity?\nChoices:\n(A) 0.0 - 0.2\n(B) 0.2 - 0.4\n(C) 0.4 - 0.6\n(D) 0.6 - 0.8\n(E) 0.8 - 1.0", + "choices": [ + "0.0 - 0.2", + "0.2 - 0.4", + "0.4 - 0.6", + "0.6 - 0.8", + "0.8 - 1.0" + ], + "answer": "0.0 - 0.2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.0 - 0.2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 606, + "img_width": 2144, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "290": { + "question_id": "290", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5e73\u884c\u7ebfAB\uff0cCD\u88ab\u76f4\u7ebfAE\u6240\u622a\uff0e\u82e5\u22201\uff1d105\u00b0\uff0c\u5219\u22202\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 75\u00b0\n(B) 85\u00b0\n(C) 95\u00b0\n(D) 105\u00b0", + "choices": [ + "75\u00b0", + "85\u00b0", + "95\u00b0", + "105\u00b0" + ], + "answer": "75\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "75\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 119, + "img_width": 132, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "292": { + "question_id": "292", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Rebecca Purple greater than Olive Drab?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 461, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "294": { + "question_id": "294", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: In Fig. 21-25, the particles have charges $q_1=-q_2=100 \\mathrm{nC}$ and $q_3=-q_4=200 \\mathrm{nC}$, and distance $a=$ $5.0 \\mathrm{~cm}$. What is the $x$ component of the net electrostatic force on particle 3?", + "choices": null, + "answer": "0.17", + "extraction": "-0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 293, + "img_width": 247, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "296": { + "question_id": "296", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The value of f(-3) is ____ the value of f(2)\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "equal to", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 776, + "img_width": 1430, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "298": { + "question_id": "298", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A decrease in rabbits would affect whose food source?\nChoices:\n(A) mountain lion\n(B) producer\n(C) decomposer\n(D) energy", + "choices": [ + "mountain lion", + "producer", + "decomposer", + "energy" + ], + "answer": "mountain lion", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "mountain lion", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 699, + "img_width": 768, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "300": { + "question_id": "300", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{HK}$ and $\\overline{IG}$ are diameters of $\\odot L$. Find $m \\widehat {IHJ}$.\nChoices:\n(A) 59\n(B) 135\n(C) 270\n(D) 301", + "choices": [ + "59", + "135", + "270", + "301" + ], + "answer": "270", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "59", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 492, + "img_width": 510, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "302": { + "question_id": "302", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the green curve?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a logarithmic function", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 300, + "img_width": 531, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "304": { + "question_id": "304", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In the figure above, two line segments meet at a point on line l. If the value of y is equal to the square of the value of x, what is the value of y?", + "choices": null, + "answer": "100", + "extraction": "100", + "prediction": "100", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 247, + "img_width": 431, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "306": { + "question_id": "306", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the bed much larger than the kitten?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "308": { + "question_id": "308", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this function most likely be?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a trigonometric function", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 276, + "img_width": 482, + "language": "english", + "skills": [ + "algebraic reasoning", + "statistical reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "310": { + "question_id": "310", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find z\nChoices:\n(A) 10\n(B) \\frac { 32 } { 3 }\n(C) \\frac { 40 } { 3 }\n(D) \\frac { 50 } { 3 }", + "choices": [ + "10", + "\\frac { 32 } { 3 }", + "\\frac { 40 } { 3 }", + "\\frac { 50 } { 3 }" + ], + "answer": "\\frac { 40 } { 3 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 218, + "img_width": 350, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "312": { + "question_id": "312", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: An Idaho farmer has been monitoring crop prices over time. In 2003, which crop cost the most per cwt?'\nChoices:\n(A) potatoes\n(B) peas\n(C) apples\n(D) canola", + "choices": [ + "potatoes", + "peas", + "apples", + "canola" + ], + "answer": "apples", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "potatoes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 204, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "314": { + "question_id": "314", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Crimson the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 522, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "316": { + "question_id": "316", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, given that points A, B, and C are on \u2299O, \u2220AOB = 100.0, then the degree of \u2220ACB is ()\nChoices:\n(A) 50\u00b0\n(B) 80\u00b0\n(C) 100\u00b0\n(D) 200\u00b0", + "choices": [ + "50\u00b0", + "80\u00b0", + "100\u00b0", + "200\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 105, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "318": { + "question_id": "318", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the area of the figure. Round to the nearest tenth if necessary.\nChoices:\n(A) 191.5\n(B) 1128\n(C) 2256\n(D) 4512", + "choices": [ + "191.5", + "1128", + "2256", + "4512" + ], + "answer": "2256", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "191.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 175, + "img_width": 239, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "320": { + "question_id": "320", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u2220C\uff1d90\u00b0\uff0cAB\uff1d13\uff0cAC\uff1d5\uff0cD\u3001E\u5206\u522b\u662fAC\u3001AB\u7684\u4e2d\u70b9\uff0c\u5219DE\u7684\u957f\u662f\uff08\uff09\nChoices:\n(A) 6.5\n(B) 6\n(C) 5.5\n(D) \\frac{\u221a{119}}{2}", + "choices": [ + "6.5", + "6", + "5.5", + "\\frac{\u221a{119}}{2}" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 90, + "img_width": 170, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "322": { + "question_id": "322", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cA\uff0cB\u4e24\u70b9\u88ab\u6c60\u5858\u9694\u5f00\uff0c\u5728AB\u5916\u9009\u4e00\u70b9C\uff0c\u4f7f\u70b9C\u80fd\u76f4\u63a5\u5230\u8fbe\u70b9A\u548c\u70b9B\uff0c\u8fde\u63a5AC\u548cBC\uff0c\u5e76\u5206\u522b\u627e\u51faAC\u548cBC\u7684\u4e2d\u70b9M\uff0cN\uff0e\u5982\u679c\u6d4b\u5f97MN\uff1d20m\uff0c\u90a3\u4e48A\uff0cB\u4e24\u70b9\u7684\u8ddd\u79bb\u662f\uff08\uff09\nChoices:\n(A) 10m\n(B) 20m\n(C) 35m\n(D) 40m", + "choices": [ + "10m", + "20m", + "35m", + "40m" + ], + "answer": "40m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 107, + "img_width": 148, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "324": { + "question_id": "324", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between highest and lowest value of dark blue bar?", + "choices": null, + "answer": "53", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 726, + "img_width": 800, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "326": { + "question_id": "326", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the pencil to the nearest inch. The pencil is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "7", + "prediction": "7", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 170, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "328": { + "question_id": "328", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of accuracies of the algorithm candy for all the datasets?", + "choices": null, + "answer": "18", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "330": { + "question_id": "330", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny cubes. Subtract all brown balls. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "332": { + "question_id": "332", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A taxi cab driver tracked how many miles he drove each month. How many miles did the taxi cab driver drive in total in January and April? (Unit: miles)", + "choices": null, + "answer": "7873", + "extraction": "2000", + "prediction": "2000", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 125, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "334": { + "question_id": "334", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer yellow metal tandem bikes in front of the small yellow metallic bicycle than metal bicycles on the left side of the large brown jet?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "336": { + "question_id": "336", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest individual bar in the whole chart?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "338": { + "question_id": "338", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle ABC above, AB = AC, E is the midpoint of line AB, and D is the midpoint of line AC. If AE = x and ED = 4, what is length BC?\nChoices:\n(A) 6\n(B) 8\n(C) 2*x\n(D) 4*x\n(E) 4*x^2", + "choices": [ + "6", + "8", + "2*x", + "4*x", + "4*x^2" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 167, + "img_width": 121, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "340": { + "question_id": "340", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following domains has the most number of BPE Tokens?\nChoices:\n(A) Legal \n(B) Code \n(C) Conversational \n(D) Math \n(E) Science\n(F) Books \n(G) News \n(H) Encyclopedic", + "choices": [ + "Legal ", + "Code ", + "Conversational ", + "Math ", + "Science", + "Books ", + "News ", + "Encyclopedic" + ], + "answer": "Science", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Legal ", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1176, + "img_width": 2142, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "342": { + "question_id": "342", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, which of the following is the greatest?\nChoices:\n(A) a\n(B) b\n(C) c\n(D) d\n(E) e", + "choices": [ + "a", + "b", + "c", + "d", + "e" + ], + "answer": "d", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 299, + "img_width": 405, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "344": { + "question_id": "344", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of metal cars that are left of the tiny matte school bus greater than the number of tiny cyan double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "346": { + "question_id": "346", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the y-intercept of this function?", + "choices": null, + "answer": "1", + "extraction": "-2", + "prediction": "-2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 339, + "img_width": 341, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "348": { + "question_id": "348", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are the pieces in triangle cuts?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 375, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "350": { + "question_id": "350", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "4", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 89, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "352": { + "question_id": "352", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people will fit in the smaller vehicle?", + "choices": null, + "answer": "1", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "354": { + "question_id": "354", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracies higher than 90?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "356": { + "question_id": "356", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer big motorbikes than rubber choppers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "358": { + "question_id": "358", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the cubes is the same as the unfolded cube?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "A", + "extraction": "D", + "prediction": "D", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 517, + "img_width": 326, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "360": { + "question_id": "360", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If $\\frac{I J}{X J}=\\frac{HJ}{YJ}, m \\angle W X J=130$\r\nand $m \\angle WZG=20,$ find $m \\angle YIZ$\nChoices:\n(A) 40\n(B) 50\n(C) 65\n(D) 110", + "choices": [ + "40", + "50", + "65", + "110" + ], + "answer": "50", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 370, + "img_width": 721, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "362": { + "question_id": "362", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all cyan cylinders. Subtract all tiny purple rubber objects. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "364": { + "question_id": "364", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, and points C and D are on \u2299O. If \u2220ABD = 50.0, then the degree of \u2220BCD is ()\nChoices:\n(A) 30\u00b0\n(B) 35\u00b0\n(C) 40\u00b0\n(D) 45\u00b0", + "choices": [ + "30\u00b0", + "35\u00b0", + "40\u00b0", + "45\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 114, + "img_width": 127, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "366": { + "question_id": "366", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "2", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 320, + "img_width": 250, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "368": { + "question_id": "368", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of yellow matte school buss greater than the number of big yellow metal cars?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "370": { + "question_id": "370", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram of the food web shown, if the number of ferns decrease, the supply of salmon will most likely?\nChoices:\n(A) decrease\n(B) can't tell\n(C) stay same\n(D) increase", + "choices": [ + "decrease", + "can't tell", + "stay same", + "increase" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 680, + "img_width": 880, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "372": { + "question_id": "372", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small gray spheres. Subtract all cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "374": { + "question_id": "374", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms calf and ivory?", + "choices": null, + "answer": "13", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "376": { + "question_id": "376", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all purple matte cubes. Subtract all tiny gray metal cubes. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "7", + "prediction": "7", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "378": { + "question_id": "378", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAD\u662f\u25b3ABC\u7684\u4e2d\u7ebf\uff0cE\u4e3aAD\u7684\u4e2d\u70b9\uff0c\u25b3ABE\u7684\u9762\u79ef\u4e3a2\uff0c\u5219\u25b3ABC\u7684\u9762\u79ef\u4e3a\uff08\uff09\nChoices:\n(A) 5\n(B) 6\n(C) 7\n(D) 8", + "choices": [ + "5", + "6", + "7", + "8" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 118, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "380": { + "question_id": "380", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For how many years that the percentage value over 4?", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "382": { + "question_id": "382", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the building through the window at least five stories tall?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 400, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "384": { + "question_id": "384", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 495, + "img_width": 626, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "386": { + "question_id": "386", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x\nChoices:\n(A) 5\n(B) 10\n(C) 10 \\sqrt { 3 }\n(D) 20", + "choices": [ + "5", + "10", + "10 \\sqrt { 3 }", + "20" + ], + "answer": "10 \\sqrt { 3 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 247, + "img_width": 164, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "388": { + "question_id": "388", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Express the ratio of $\\tan M$ as a decimal to the nearest hundredth.\nChoices:\n(A) 0.38\n(B) 0.42\n(C) 0.92\n(D) 2.40", + "choices": [ + "0.38", + "0.42", + "0.92", + "2.40" + ], + "answer": "0.42", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.38", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 209, + "img_width": 342, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "390": { + "question_id": "390", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer jets that are left of the small brown suv than objects right of the big shiny car?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "392": { + "question_id": "392", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Mr. Huffman, a P.E. teacher, wrote down how much weight each of his students could lift. How many people lifted at least 46 pounds? (Unit: people)", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 136, + "img_width": 197, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "394": { + "question_id": "394", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following environments has the least GPU days for training?\nChoices:\n(A) HomeGrid\n(B) Msgr S1\n(C) Msgr S2\n(D) Msgr S3\n(E) VLN\n(F) LangRoom", + "choices": [ + "HomeGrid", + "Msgr S1", + "Msgr S2", + "Msgr S3", + "VLN", + "LangRoom" + ], + "answer": "LangRoom", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "HomeGrid", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 858, + "img_width": 1854, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "396": { + "question_id": "396", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, if all the algae dies then water flea population will\nChoices:\n(A) remains the same\n(B) decrease\n(C) increase\n(D) NA", + "choices": [ + "remains the same", + "decrease", + "increase", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "remains the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 576, + "img_width": 720, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "398": { + "question_id": "398", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 942, + "img_width": 727, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "400": { + "question_id": "400", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At which Episode ID does the Retroformer attain its peak Success rate (%)?\nChoices:\n(A) 1.0\n(B) 1.5\n(C) 2.0\n(D) 2.5\n(E) 3.0\n(F) 3.5\n(G) 4.0", + "choices": [ + "1.0", + "1.5", + "2.0", + "2.5", + "3.0", + "3.5", + "4.0" + ], + "answer": "4.0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1.0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 942, + "img_width": 1196, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "402": { + "question_id": "402", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the food chain diagram below, which animal would most directly lack food if Grasshoppers get exterminated?\nChoices:\n(A) Rabbit\n(B) Deer\n(C) Frogs\n(D) Wolf", + "choices": [ + "Rabbit", + "Deer", + "Frogs", + "Wolf" + ], + "answer": "Frogs", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Rabbit", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 735, + "img_width": 909, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "404": { + "question_id": "404", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the following schedule. Which activity begins at 11.50 A.M.?'\nChoices:\n(A) figure skating practice\n(B) private class\n(C) adult class\n(D) children's class", + "choices": [ + "figure skating practice", + "private class", + "adult class", + "children's class" + ], + "answer": "children's class", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "figure skating practice", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 217, + "img_width": 325, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "406": { + "question_id": "406", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many snowmen are there?", + "choices": null, + "answer": "15", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 183, + "img_width": 714, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "408": { + "question_id": "408", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find z.\nChoices:\n(A) 6\n(B) 6 \\sqrt { 2 }\n(C) 6 \\sqrt { 3 }\n(D) 6 \\sqrt { 5 }", + "choices": [ + "6", + "6 \\sqrt { 2 }", + "6 \\sqrt { 3 }", + "6 \\sqrt { 5 }" + ], + "answer": "6 \\sqrt { 5 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 238, + "img_width": 362, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "410": { + "question_id": "410", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the perimeter of $\\triangle D E F,$ if $\\triangle D E F \\sim \\triangle C B F,$ perimeter of $\\triangle C B F=27, D F=6,$ and $F C=8$\nChoices:\n(A) 20.25\n(B) 21\n(C) 27\n(D) 36", + "choices": [ + "20.25", + "21", + "27", + "36" + ], + "answer": "20.25", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20.25", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 226, + "img_width": 405, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "412": { + "question_id": "412", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Tanner has $35. Does he have enough to buy a black jacket and a pair of shorts?'\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 192, + "img_width": 235, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "414": { + "question_id": "414", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If $ST=8, TR=4$, and $PT=6$, find $QR$.\nChoices:\n(A) 6\n(B) 8\n(C) 9\n(D) 10", + "choices": [ + "6", + "8", + "9", + "10" + ], + "answer": "9", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 386, + "img_width": 509, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "416": { + "question_id": "416", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the highest volume written on the blender?", + "choices": null, + "answer": "800", + "extraction": "1000", + "prediction": "1000", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1024, + "img_width": 768, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "418": { + "question_id": "418", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the number of grasshoppers decreases, what will the population of spiders most likely do?\nChoices:\n(A) remain the same\n(B) increase\n(C) decrease\n(D) NA", + "choices": [ + "remain the same", + "increase", + "decrease", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "remain the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "420": { + "question_id": "420", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the lowest value on the Y axis?", + "choices": null, + "answer": "0.0", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 1763, + "img_width": 2256, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "422": { + "question_id": "422", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "10", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "424": { + "question_id": "424", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the food half eaten?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 428, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "426": { + "question_id": "426", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u82e5DE\u662f\u25b3ABC\u7684\u4e2d\u4f4d\u7ebf\uff0c\u25b3ADE\u7684\u5468\u957f\u4e3a1\uff0c\u5219\u25b3ABC\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4", + "choices": [ + "1", + "2", + "3", + "4" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 154, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "428": { + "question_id": "428", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "28", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 968, + "img_width": 1259, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "430": { + "question_id": "430", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The derivative of f(x) at x=0 is ____ that at x=5\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "smaller than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 393, + "img_width": 552, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "432": { + "question_id": "432", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of undernourished male children greater than 0.4 %?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1085, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "434": { + "question_id": "434", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, side AC of triangle ABC is on line l. What is x in terms of k?\nChoices:\n(A) 60-k\n(B) k\n(C) 60+k\n(D) 120-k\n(E) 120-2*k", + "choices": [ + "60-k", + "k", + "60+k", + "120-k", + "120-2*k" + ], + "answer": "60-k", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60-k", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 157, + "img_width": 215, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "436": { + "question_id": "436", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracy lower than 8 in at least one dataset?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "438": { + "question_id": "438", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "13", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 367, + "img_width": 329, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "440": { + "question_id": "440", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the white plate half full?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 640, + "img_width": 480, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "442": { + "question_id": "442", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many objects are preferred by more than 7 people in at least one category?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "444": { + "question_id": "444", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the two genders?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "446": { + "question_id": "446", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u70b9D\u662f\u25b3ABC\u7684\u5185\u5fc3\uff0c\u8fde\u63a5DB\uff0cDC\uff0c\u8fc7\u70b9D\u4f5cEF\u2225BC\u5206\u522b\u4ea4AB\u3001AC\u4e8e\u70b9E\u3001F\uff0c\u82e5BE+CF\uff1d8\uff0c\u5219EF\u7684\u957f\u5ea6\u4e3a\uff08\uff09\nChoices:\n(A) 4\n(B) 5\n(C) 8\n(D) 16", + "choices": [ + "4", + "5", + "8", + "16" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 105, + "img_width": 144, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "448": { + "question_id": "448", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year recorded the highest share of Urban secondary schools with access to electricity in India?", + "choices": null, + "answer": "2016", + "extraction": "2015", + "prediction": "2015", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "450": { + "question_id": "450", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If all the grass died, what would be most affected?\nChoices:\n(A) garter snakes\n(B) hognose snakes\n(C) hawks\n(D) grasshoppers", + "choices": [ + "garter snakes", + "hognose snakes", + "hawks", + "grasshoppers" + ], + "answer": "grasshoppers", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "garter snakes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "452": { + "question_id": "452", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the image, what is the most likely equilibrium population count?\nChoices:\n(A) 40\n(B) 60\n(C) 80\n(D) 100", + "choices": [ + "40", + "60", + "80", + "100" + ], + "answer": "80", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 366, + "img_width": 441, + "language": "english", + "skills": [ + "algebraic reasoning", + "statistical reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "454": { + "question_id": "454", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "456": { + "question_id": "456", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "458": { + "question_id": "458", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: If you add the two visible numbers, on the jerseys, what is the total sum?", + "choices": null, + "answer": "3", + "extraction": "23", + "prediction": "23", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "460": { + "question_id": "460", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If there were fewer leaves in this ecosystem, the first organism to experience change as a result would be:\nChoices:\n(A) Frogs\n(B) Crickets\n(C) Snakes\n(D) Hawks", + "choices": [ + "Frogs", + "Crickets", + "Snakes", + "Hawks" + ], + "answer": "Crickets", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Frogs", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 720, + "img_width": 960, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "462": { + "question_id": "462", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values larger than 100?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "464": { + "question_id": "464", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer for the missing picture.\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4\n(E) 5\n(F) 6", + "choices": [ + "1", + "2", + "3", + "4", + "5", + "6" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 1316, + "img_width": 1000, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "466": { + "question_id": "466", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Periwinkle intersect Yellow Green?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 487, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "468": { + "question_id": "468", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people prefer the most preferred object?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "470": { + "question_id": "470", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following models has the lowest KS Rollout Loss overall?\nChoices:\n(A) Baseline\n(B) Diffusion\n(C) PDE-Refiner\n(D) Pushforward", + "choices": [ + "Baseline", + "Diffusion", + "PDE-Refiner", + "Pushforward" + ], + "answer": "PDE-Refiner", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Baseline", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 854, + "img_width": 1422, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "472": { + "question_id": "472", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "474": { + "question_id": "474", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many miles per gallon do an average city bus get?", + "choices": null, + "answer": "25", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 333, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "476": { + "question_id": "476", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If frogs were removed from this environment what animal would potentially see an increase in its population?\nChoices:\n(A) crickets\n(B) deer\n(C) snakes\n(D) hawks", + "choices": [ + "crickets", + "deer", + "snakes", + "hawks" + ], + "answer": "crickets", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "crickets", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 405, + "img_width": 518, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "478": { + "question_id": "478", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the diamond ABCD, two diagonal lines AC = 12.0, BD = 16.0, then the edge length of this diamond is ()\nChoices:\n(A) 10\n(B) 8\n(C) 6\n(D) 5", + "choices": [ + "10", + "8", + "6", + "5" + ], + "answer": "10", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 97, + "img_width": 125, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "480": { + "question_id": "480", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny blue metal bicycles behind the small sedan less than the number of purple fighters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "482": { + "question_id": "482", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, triangle ABC is inscribed in the circle with center O and diameter AC. If AB = AO, what is the degree measure of angle ABO?\nChoices:\n(A) 15*\\degree\n(B) 30*\\degree\n(C) 45*\\degree\n(D) 60*\\degree\n(E) 90*\\degree", + "choices": [ + "15*\\degree", + "30*\\degree", + "45*\\degree", + "60*\\degree", + "90*\\degree" + ], + "answer": "60*\\degree", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15*\\degree", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 134, + "img_width": 143, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "484": { + "question_id": "484", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "486": { + "question_id": "486", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0cAB\uff1d5\uff0cAD\uff1d7\uff0c\u5219ABCD\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 12\n(B) 14\n(C) 35\n(D) 24", + "choices": [ + "12", + "14", + "35", + "24" + ], + "answer": "24", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "12", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 79, + "img_width": 156, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "488": { + "question_id": "488", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown things. Subtract all tiny blue metallic objects. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "490": { + "question_id": "490", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9A\u3001C\u3001B\u5728\u540c\u4e00\u76f4\u7ebf\u4e0a\uff0cDC\u22a5EC\uff0c\u82e5\u2220BCD\uff1d40\u00b0\uff0c\u5219\u2220ACE\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 30\u00b0\n(B) 40\u00b0\n(C) 50\u00b0\n(D) 60\u00b0", + "choices": [ + "30\u00b0", + "40\u00b0", + "50\u00b0", + "60\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 88, + "img_width": 155, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "492": { + "question_id": "492", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the \u2299O with a radius of 2.0, C is a point on the extended line of the diameter AB, CD is tangent to the circle at point D. Connect AD, given that \u2220DAC = 30.0, the length of the line segment CD is ()\nChoices:\n(A) 1\n(B) \u221a{3}\n(C) 2\n(D) 2\u221a{3}", + "choices": [ + "1", + "\u221a{3}", + "2", + "2\u221a{3}" + ], + "answer": "2\u221a{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 158, + "img_width": 203, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "494": { + "question_id": "494", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "3", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 97, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "496": { + "question_id": "496", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "20", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "498": { + "question_id": "498", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the water half full?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 478, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "500": { + "question_id": "500", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1236, + "img_width": 987, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "502": { + "question_id": "502", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tandem bikes that are behind the brown metal bicycle than matte trucks on the left side of the green object?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "504": { + "question_id": "504", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, D and E are the points on the edges AB and AC of \u25b3ABC, DE \u2225 BC, if AD:DB=1.0:3.0, AE = 2.0, then the length of AC is ()\nChoices:\n(A) 10\n(B) 8\n(C) 6\n(D) 4", + "choices": [ + "10", + "8", + "6", + "4" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 86, + "img_width": 117, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "506": { + "question_id": "506", + "query": "Hint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?", + "choices": null, + "answer": "[2014, 2016]", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "true_false": false, + "question_type": "free_form", + "answer_type": "list", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "508": { + "question_id": "508", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The owner of a bed and breakfast inn recalled how many guests the inn had hosted each day. What is the median of the numbers?'", + "choices": null, + "answer": "5", + "extraction": "5", + "prediction": "5", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 241, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "510": { + "question_id": "510", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220C = 90.0, AC = 4.0, AB = 5.0, then the value of sinB is ()\nChoices:\n(A) \\frac{2}{3}\n(B) \\frac{3}{5}\n(C) \\frac{3}{4}\n(D) \\frac{4}{5}", + "choices": [ + "\\frac{2}{3}", + "\\frac{3}{5}", + "\\frac{3}{4}", + "\\frac{4}{5}" + ], + "answer": "\\frac{4}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{2}{3}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 186, + "img_width": 119, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "512": { + "question_id": "512", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the y coordinate of the center of mass of the isosceles right triangle of uniform areal density shown in Figure 9-C?", + "choices": null, + "answer": "0.24", + "extraction": "0.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 356, + "img_width": 497, + "language": "english", + "skills": [ + "geometry reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "514": { + "question_id": "514", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you wanted the leaf with the least main veins, which would you choose?\nChoices:\n(A) 3 main veins\n(B) pinnate\n(C) reticulate\n(D) palmate", + "choices": [ + "3 main veins", + "pinnate", + "reticulate", + "palmate" + ], + "answer": "3 main veins", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3 main veins", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 236, + "img_width": 559, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "516": { + "question_id": "516", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are most the stepping stones square?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 339, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "518": { + "question_id": "518", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2211, + "img_width": 2838, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "520": { + "question_id": "520", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Magenta have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 741, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "522": { + "question_id": "522", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 86, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "524": { + "question_id": "524", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The Kingwood Ski Resort asked its guests how many times they went sledding last winter. How many guests went sledding more than 2 times?'", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 163, + "img_width": 351, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "526": { + "question_id": "526", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) slide\n(B) flip\n(C) turn", + "choices": [ + "slide", + "flip", + "turn" + ], + "answer": "slide", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "slide", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 104, + "img_width": 253, + "language": "english", + "skills": [ + "geometry reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "528": { + "question_id": "528", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u2225CD\uff0cBD\u22a5CF\uff0c\u5782\u8db3\u4e3aB\uff0c\u2220ABF\uff1d35\u00b0\uff0c\u5219\u2220BDC\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 25\u00b0\n(B) 35\u00b0\n(C) 45\u00b0\n(D) 55\u00b0", + "choices": [ + "25\u00b0", + "35\u00b0", + "45\u00b0", + "55\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 135, + "img_width": 194, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "530": { + "question_id": "530", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The advertising agency counted the number of billboards in each city in the state. How many cities have fewer than 70 billboards? (Unit: cities)", + "choices": null, + "answer": "9", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 180, + "img_width": 140, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "532": { + "question_id": "532", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer gray trucks that are in front of the large aeroplane than big yellow metal objects in front of the purple object?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "534": { + "question_id": "534", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of stunted female children greater than the average percentage of stunted female children taken over all years ?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 883, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "536": { + "question_id": "536", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A, B, and C are on \u2299O, if \u2220C = 35.0, then \u2220AOB = ()\nChoices:\n(A) 17.5\u00b0\n(B) 35\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "17.5\u00b0", + "35\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "70\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "17.5\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 105, + "img_width": 115, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "538": { + "question_id": "538", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the two concentric circles, the chord AB of the great circle is tangent to the small circle at point C. If AB = 6.0, the area of \u200b\u200bthe ring is ()\nChoices:\n(A) 9\u03c0\n(B) 6\u03c0\n(C) 3\u03c0\n(D) \u03c0", + "choices": [ + "9\u03c0", + "6\u03c0", + "3\u03c0", + "\u03c0" + ], + "answer": "9\u03c0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "9\u03c0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 115, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "540": { + "question_id": "540", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5", + "choices": [ + "3/11", + "8/11", + "6/11", + "3/5" + ], + "answer": "3/11", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3/11", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 103, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "542": { + "question_id": "542", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many models in the figure achieve an Acc score greater than 60?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scatter plot", + "grade": "college", + "img_height": 1358, + "img_width": 1690, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "544": { + "question_id": "544", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the total percentage of people who say that they do either less or more often than the usual amount of exercise during the coronavirus pandemic in the United States as of April 2020?", + "choices": null, + "answer": "44", + "extraction": "77", + "prediction": "77", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "546": { + "question_id": "546", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the overall ratio of male to female?", + "choices": null, + "answer": "1", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "548": { + "question_id": "548", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer cyan jets than big buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "550": { + "question_id": "550", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the accuracy of the algorithm with highest accuracy?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "552": { + "question_id": "552", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many queries have a p-value lower than 0.50?", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 330, + "img_width": 1726, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "554": { + "question_id": "554", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Burlywood the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 488, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "556": { + "question_id": "556", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer large red metallic things that are on the left side of the cyan shiny scooter than things that are in front of the small jet?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "558": { + "question_id": "558", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "560": { + "question_id": "560", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Salmon the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 514, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "562": { + "question_id": "562", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small green cubes. Subtract all large cylinders. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "564": { + "question_id": "564", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest and the lowest time required to import ?", + "choices": null, + "answer": "4", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1056, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "566": { + "question_id": "566", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5df2\u77e5\u25b3ABC\u224c\u25b3DEF\uff0cCD\u5e73\u5206\u2220BCA\uff0c\u82e5\u2220A\uff1d22\u00b0\uff0c\u2220CGF\uff1d88\u00b0\uff0c\u5219\u2220E\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 26\u00b0\n(B) 28\u00b0\n(C) 30\u00b0\n(D) 34\u00b0", + "choices": [ + "26\u00b0", + "28\u00b0", + "30\u00b0", + "34\u00b0" + ], + "answer": "26\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "26\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 89, + "img_width": 89, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "568": { + "question_id": "568", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For an economics project, Colleen determined the cost of ferry rides for bicycles and cars. How much higher is the fare for a car on the Mukilteu-Clinton ferry than on the Southport-Fort Fisher ferry? (Unit: $)", + "choices": null, + "answer": "2", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 349, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "570": { + "question_id": "570", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all purple matte blocks. Subtract all brown things. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "572": { + "question_id": "572", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When does the function start decreasing?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 316, + "img_width": 400, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "574": { + "question_id": "574", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do you see the figures inside these boxes? They form a pattern. Choose the figure in the answer row below that continues the pattern.\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4\n(E) 5", + "choices": [ + "1", + "2", + "3", + "4", + "5" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 378, + "img_width": 868, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "576": { + "question_id": "576", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the human brain is the largest and most anterior part of each cerebral hemisphere?\nChoices:\n(A) motor cortex\n(B) occipital lobe\n(C) temporal lobe\n(D) frontal lobe", + "choices": [ + "motor cortex", + "occipital lobe", + "temporal lobe", + "frontal lobe" + ], + "answer": "frontal lobe", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "motor cortex", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 625, + "img_width": 768, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "578": { + "question_id": "578", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9567", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 285, + "img_width": 637, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "580": { + "question_id": "580", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Slate the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 650, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "582": { + "question_id": "582", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Green greater than Rebecca Purple?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 582, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "584": { + "question_id": "584", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A philanthropic organization compared the amounts of money that its members donated to certain causes. Who donated more money to arts education, Aubrey or Connor?'\nChoices:\n(A) Connor\n(B) Aubrey", + "choices": [ + "Connor", + "Aubrey" + ], + "answer": "Connor", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Connor", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 391, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "586": { + "question_id": "586", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220BAC = 90.0, rotate \u25b3ABC clockwise around point A by 90.0 to obtain \u25b3AB\u2032C\u2032 (the corresponding point of point B is point B\u2032, and the corresponding point of point C is point C \u2032), connect CC\u2032. If \u2220CC\u2032B\u2032 = 32.0, then the size of \u2220AC\u2032B\u2032 is ()\nChoices:\n(A) 32\u00b0\n(B) 45\u00b0\n(C) 13\u00b0\n(D) 30\u00b0", + "choices": [ + "32\u00b0", + "45\u00b0", + "13\u00b0", + "30\u00b0" + ], + "answer": "13\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "32\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 75, + "img_width": 80, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "588": { + "question_id": "588", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year has more actual total income?", + "choices": null, + "answer": "1982", + "extraction": "1970", + "prediction": "1970", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2281, + "img_width": 1785, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "590": { + "question_id": "590", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "13", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 264, + "img_width": 376, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "592": { + "question_id": "592", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the global maximum of this function?", + "choices": null, + "answer": "4", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 318, + "img_width": 283, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "594": { + "question_id": "594", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When does the expenditure per student in Jamaica have the greatest increase?", + "choices": null, + "answer": "2005", + "extraction": "2005", + "prediction": "2005", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "596": { + "question_id": "596", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dodger Blue the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 407, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "598": { + "question_id": "598", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most curved beak species?\nChoices:\n(A) iiki\n(B) swallow-tanager\n(C) cliff swallow\n(D) hawfinch", + "choices": [ + "iiki", + "swallow-tanager", + "cliff swallow", + "hawfinch" + ], + "answer": "iiki", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "iiki", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 463, + "img_width": 593, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "600": { + "question_id": "600", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "A", + "extraction": "D", + "prediction": "D", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 637, + "img_width": 424, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "602": { + "question_id": "602", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Rectangle ABCD is subdivided into two identical square regions, as in the figure above. If the area of each square is 9, what is the perimeter of ABCD?", + "choices": null, + "answer": "18", + "extraction": "24", + "prediction": "24", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 219, + "img_width": 435, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "604": { + "question_id": "604", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "606": { + "question_id": "606", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the rectangle?", + "choices": null, + "answer": "10", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 209, + "img_width": 335, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "608": { + "question_id": "608", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does South Carolina have the highest value in the South ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 560, + "img_width": 775, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "610": { + "question_id": "610", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, P, Q, and R lie on the same line. P is the center of the larger circle, and Q is the center of the smaller circle. If the radius of the larger circle is 4, what is the radius of the smaller circle?\nChoices:\n(A) 1\n(B) 2\n(C) 4\n(D) 8\n(E) 16", + "choices": [ + "1", + "2", + "4", + "8", + "16" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 353, + "img_width": 411, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "612": { + "question_id": "612", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue metal things. Subtract all tiny objects. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "614": { + "question_id": "614", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 661, + "img_width": 915, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "616": { + "question_id": "616", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the ratio of instagram to google?", + "choices": null, + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "618": { + "question_id": "618", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "620": { + "question_id": "620", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 199, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "622": { + "question_id": "622", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0cD\u662fBC\u4e0a\u7684\u70b9\uff0c\u4e14BD\uff1d2\uff0cDC\uff1d1\uff0cS\u25b3ACD\uff1d12\uff0c\u90a3\u4e48S\u25b3ABC\u7b49\u4e8e\uff08\uff09\nChoices:\n(A) 30\n(B) 36\n(C) 72\n(D) 24", + "choices": [ + "30", + "36", + "72", + "24" + ], + "answer": "36", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 146, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "624": { + "question_id": "624", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the total unemployed labor force in Upper middle income greater than 1.6 %?", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1344, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "626": { + "question_id": "626", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown objects. Subtract all large purple cylinders. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "628": { + "question_id": "628", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0c\u2220ABC\u7684\u5e73\u5206\u7ebf\u4ea4AD\u4e8e\u70b9E\uff0c\u2220BCD\u7684\u5e73\u5206\u7ebf\u4ea4AD\u4e8e\u70b9F\uff0c\u82e5AB\uff1d3\uff0cAD\uff1d4\uff0c\u5219EF\u7684\u957f\u662f\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 2.5\n(D) 3", + "choices": [ + "1", + "2", + "2.5", + "3" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 151, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "630": { + "question_id": "630", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Find the size of angle MBD in the figure below.", + "choices": null, + "answer": "72", + "extraction": "66", + "prediction": "66", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 195, + "img_width": 340, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "632": { + "question_id": "632", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the total value of the More bar?", + "choices": null, + "answer": "52", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 350, + "img_width": 309, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "634": { + "question_id": "634", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfAB\uff0cCD\u4ea4\u4e8e\u70b9O\uff0e\u5c04\u7ebfOE\u5e73\u5206\u2220BOC\uff0c\u82e5\u2220AOD\uff1d70\u00b0\uff0c\u5219\u2220AOE\u7b49\u4e8e\uff08\uff09\nChoices:\n(A) 35\u00b0\n(B) 110\u00b0\n(C) 135\u00b0\n(D) 145\u00b0", + "choices": [ + "35\u00b0", + "110\u00b0", + "135\u00b0", + "145\u00b0" + ], + "answer": "145\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 141, + "img_width": 173, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "636": { + "question_id": "636", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "34", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 117, + "img_width": 92, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "638": { + "question_id": "638", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the under-5 male mortality rate greater than the average under-5 male mortality rate taken over all years ?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 880, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "640": { + "question_id": "640", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $\\widehat{\\mathrm{WN}}$ if $\\triangle \\mathrm{IWN}$ is equilateral and $W N=5$\nChoices:\n(A) \\frac { 3 } { 5 } \\pi\n(B) \\frac { 5 } { 3 } \\pi\n(C) 5 \\pi\n(D) 10 \\pi", + "choices": [ + "\\frac { 3 } { 5 } \\pi", + "\\frac { 5 } { 3 } \\pi", + "5 \\pi", + "10 \\pi" + ], + "answer": "\\frac { 5 } { 3 } \\pi", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac { 3 } { 5 } \\pi", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 222, + "img_width": 309, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "642": { + "question_id": "642", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Line AB is tangent to circle O. If AB = 8 and OB = 10, find the diameter of the circle.\nChoices:\n(A) 4\n(B) 6\n(C) 8\n(D) 10\n(E) 12", + "choices": [ + "4", + "6", + "8", + "10", + "12" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 443, + "img_width": 347, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "644": { + "question_id": "644", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the missing number in the picture?\nChoices:\n(A) 6\n(B) 8\n(C) 10\n(D) 11", + "choices": [ + "6", + "8", + "10", + "11" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 452, + "img_width": 494, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "646": { + "question_id": "646", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The employee at the department store counted the number of ties on each tie rack. How many racks have at least 0 ties? (Unit: racks)", + "choices": null, + "answer": "25", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 224, + "img_width": 131, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "648": { + "question_id": "648", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the minimum value of this function?", + "choices": null, + "answer": "-1", + "extraction": "-2", + "prediction": "-2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 296, + "img_width": 600, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "650": { + "question_id": "650", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the sum of maximum employment rate and minimum employment?", + "choices": null, + "answer": "31.3", + "extraction": "18.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "652": { + "question_id": "652", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 365, + "img_width": 845, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "654": { + "question_id": "654", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer yellow metallic motorbikes that are in front of the small brown metal dirtbike than big yellow dirtbikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "656": { + "question_id": "656", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Maroon the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 776, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "658": { + "question_id": "658", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 115, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "660": { + "question_id": "660", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer small fighters than yellow matte tandem bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "662": { + "question_id": "662", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much more accurate is the most accurate algorithm compared the least accurate algorithm?", + "choices": null, + "answer": "80", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "664": { + "question_id": "664", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest number of responses for Question 10, for any given % of inside sales?", + "choices": null, + "answer": "17", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2245, + "img_width": 1692, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "666": { + "question_id": "666", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red objects. Subtract all big green things. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "4", + "prediction": "4", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "668": { + "question_id": "668", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does the first symbol in the legend represent the smallest category ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 560, + "img_width": 775, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "670": { + "question_id": "670", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which date of Meeting was the most number of shares transferred?\nChoices:\n(A) 04/06/2005\n(B) 04/02/2005\n(C) 04/05/2005\n(D) 04/03/2005\n(E) 04/04/2005", + "choices": [ + "04/06/2005", + "04/02/2005", + "04/05/2005", + "04/03/2005", + "04/04/2005" + ], + "answer": "04/02/2005", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "04/06/2005", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2135, + "img_width": 1582, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "672": { + "question_id": "672", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the twig to the nearest inch. The twig is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 169, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "674": { + "question_id": "674", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, CDE is an equilateral triangle and ABCE is a square with an area of 1. What is the perimeter of polygon ABCDE?\nChoices:\n(A) 4\n(B) 5\n(C) 6\n(D) 7\n(E) 8", + "choices": [ + "4", + "5", + "6", + "7", + "8" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 89, + "img_width": 125, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "676": { + "question_id": "676", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "678": { + "question_id": "678", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x\nChoices:\n(A) 21\n(B) 34\n(C) 58\n(D) 67", + "choices": [ + "21", + "34", + "58", + "67" + ], + "answer": "58", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "21", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 149, + "img_width": 267, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "680": { + "question_id": "680", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "5", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 303, + "img_width": 440, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "682": { + "question_id": "682", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, if all the grass dies then population of squirrel will\nChoices:\n(A) decrease\n(B) remains the same\n(C) increase\n(D) NA", + "choices": [ + "decrease", + "remains the same", + "increase", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 592, + "img_width": 864, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "684": { + "question_id": "684", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{CH} \\cong \\overline{KJ}$. Find $x$.\nChoices:\n(A) 27\n(B) 54\n(C) 55\n(D) 83", + "choices": [ + "27", + "54", + "55", + "83" + ], + "answer": "55", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "27", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 444, + "img_width": 608, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "686": { + "question_id": "686", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function invertible?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 442, + "img_width": 731, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "688": { + "question_id": "688", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the minimum age group shown in the \u2018plots\u2019?\nChoices:\n(A) 11-15\n(B) 21-25\n(C) 6-10\n(D) 16-20\n(E) 0-5", + "choices": [ + "11-15", + "21-25", + "6-10", + "16-20", + "0-5" + ], + "answer": "0-5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "11-15", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2136, + "img_width": 3160, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "690": { + "question_id": "690", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram above, lines M and N are parallel. All of the following are true except\nChoices:\n(A) a + b = j + l\n(B) g = h\n(C) c + f = f + b\n(D) g + e + f + h = 360\n(E) d + e = f + j", + "choices": [ + "a + b = j + l", + "g = h", + "c + f = f + b", + "g + e + f + h = 360", + "d + e = f + j" + ], + "answer": "d + e = f + j", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a + b = j + l", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 558, + "img_width": 625, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "692": { + "question_id": "692", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the given food chain if grasses dried up in summer, what is likely to happen?\nChoices:\n(A) Grasshoppers will decrease.\n(B) shrews will become extinct\n(C) owls will increase.\n(D) None of the above", + "choices": [ + "Grasshoppers will decrease.", + "shrews will become extinct", + "owls will increase.", + "None of the above" + ], + "answer": "Grasshoppers will decrease.", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Grasshoppers will decrease.", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 189, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "694": { + "question_id": "694", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u83f1\u5f62ABCD\u4e2d\uff0cM\u3001N\u5206\u522b\u662fBC\u548cCD\u7684\u4e2d\u70b9\uff0cNP\u22a5AB\u4e8e\u70b9P\uff0c\u8fde\u63a5MP\uff0e\u82e5\u2220DAB\uff1d40\u00b0\uff0c\u5219\u2220MPB\uff1d\uff08\uff09\nChoices:\n(A) 125\u00b0\n(B) 120\u00b0\n(C) 115\u00b0\n(D) 110\u00b0", + "choices": [ + "125\u00b0", + "120\u00b0", + "115\u00b0", + "110\u00b0" + ], + "answer": "110\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "125\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 85, + "img_width": 158, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "696": { + "question_id": "696", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Erica has $1,525.00. Does she have enough to buy a motorcycle and a canoe?'\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 192, + "img_width": 214, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "698": { + "question_id": "698", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the triangle in the figure above, what is the value of x?\nChoices:\n(A) 2*\\sqrt{3}\n(B) 6*\\sqrt{2}\n(C) 6*\\sqrt{3}\n(D) 6\n(E) 12", + "choices": [ + "2*\\sqrt{3}", + "6*\\sqrt{2}", + "6*\\sqrt{3}", + "6", + "12" + ], + "answer": "2*\\sqrt{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2*\\sqrt{3}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 376, + "img_width": 615, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "700": { + "question_id": "700", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u2299O\u662f\u25b3ABC\u7684\u5916\u63a5\u5706\uff0cAB\uff1dBC\uff1d4\uff0c\u628a\u5f27AB\u6cbf\u5f26AB\u5411\u4e0b\u6298\u53e0\u4ea4BC\u4e8e\u70b9D\uff0c\u82e5\u70b9D\u4e3aBC\u4e2d\u70b9\uff0c\u5219AC\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 2\u221a{2}\n(D) \u221a{6}", + "choices": [ + "1", + "2", + "2\u221a{2}", + "\u221a{6}" + ], + "answer": "2\u221a{2}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 132, + "img_width": 144, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "702": { + "question_id": "702", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is cumulative increase in weight ( in grams) for \"GROUP A\" in third week ( give an approximate value) ?", + "choices": null, + "answer": "400", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2237, + "img_width": 1754, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "704": { + "question_id": "704", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which two puzzle pieces form the larger square?\nChoices:\n(A) 1 & 2\n(B) 1 & 3\n(C) 1 & 4\n(D) 2 & 3\n(E) 2 & 4", + "choices": [ + "1 & 2", + "1 & 3", + "1 & 4", + "2 & 3", + "2 & 4" + ], + "answer": "1 & 3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1 & 2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 440, + "img_width": 396, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "706": { + "question_id": "706", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the image of the dot (8,-2) under a clockwise rotation by 270\u00b0 about the origin.\"\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "C", + "extraction": "B", + "prediction": "B", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 432, + "img_width": 438, + "language": "english", + "skills": [ + "logical reasoning", + "geometry reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "708": { + "question_id": "708", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the light source P is directly above the crossbar AB, the shadow of AB under the light is CD, AB \u2225 CD, AB = 2.0, CD = 5.0, the distance between point P and CD is 3.0, then the distance between AB and CD is ().\nChoices:\n(A) \\frac{6}{5}\n(B) \\frac{7}{6}\n(C) \\frac{9}{5}\n(D) \\frac{15}{2}", + "choices": [ + "\\frac{6}{5}", + "\\frac{7}{6}", + "\\frac{9}{5}", + "\\frac{15}{2}" + ], + "answer": "\\frac{9}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{6}{5}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 156, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "710": { + "question_id": "710", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1555, + "img_width": 2293, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "712": { + "question_id": "712", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "9", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 244, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "714": { + "question_id": "714", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of large brown rubber motorbikes in front of the big motorbike greater than the number of big green sedans?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "716": { + "question_id": "716", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find y.\nChoices:\n(A) 16 \\sqrt { 2 }\n(B) 16 \\sqrt { 3 }\n(C) 32\n(D) 16 \\sqrt { 5 }", + "choices": [ + "16 \\sqrt { 2 }", + "16 \\sqrt { 3 }", + "32", + "16 \\sqrt { 5 }" + ], + "answer": "16 \\sqrt { 5 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "16 \\sqrt { 2 }", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 196, + "img_width": 427, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "718": { + "question_id": "718", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Jeffrey is the proud owner of an eclectic bow tie collection. He keeps track of how many bow ties he has, and organizes them by pattern and material. What is the probability that a randomly selected bow tie is designed with swirls and is made of velvet? Simplify any fractions.'", + "choices": null, + "answer": "0.21", + "extraction": "0.33", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 94, + "img_width": 215, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "720": { + "question_id": "720", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When does the function value first reach 2?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 350, + "img_width": 362, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "722": { + "question_id": "722", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Deep Sky Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 677, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "724": { + "question_id": "724", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Rebecca Purple have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 638, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "726": { + "question_id": "726", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x. Assume that any segment that appears to be tangent is tangent.\nChoices:\n(A) 10\n(B) 30\n(C) 90\n(D) 120", + "choices": [ + "10", + "30", + "90", + "120" + ], + "answer": "10", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 199, + "img_width": 228, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "728": { + "question_id": "728", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 69, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "730": { + "question_id": "730", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In which year the market share of KLA is highest?", + "choices": null, + "answer": "2019", + "extraction": "2013", + "prediction": "2013", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "732": { + "question_id": "732", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism would be most affected if there was a shortage of plants?\nChoices:\n(A) Grasshopper\n(B) Snake\n(C) Mouse\n(D) Hawk", + "choices": [ + "Grasshopper", + "Snake", + "Mouse", + "Hawk" + ], + "answer": "Grasshopper", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Grasshopper", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1080, + "img_width": 1152, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "734": { + "question_id": "734", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer double buss that are behind the aeroplane than things on the left side of the yellow double bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "736": { + "question_id": "736", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5df2\u77e5\u76f4\u7ebfa\u2225b\uff0c\u76f4\u89d2\u4e09\u89d2\u5f62ABC\u4e2d\uff0c\u2220C\uff1d90\u00b0\uff0c\u82e5\u2220B\uff1d58\u00b0\uff0c\u90a3\u4e48\u22201\ufe63\u22202\uff1d\uff08\uff09\nChoices:\n(A) 28\u00b0\n(B) 30\u00b0\n(C) 32\u00b0\n(D) 58\u00b0", + "choices": [ + "28\u00b0", + "30\u00b0", + "32\u00b0", + "58\u00b0" + ], + "answer": "32\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "28\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 154, + "img_width": 226, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "738": { + "question_id": "738", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function continuous?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 268, + "img_width": 383, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "740": { + "question_id": "740", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What percent of the stands are full?\nChoices:\n(A) 15\n(B) 100\n(C) 50\n(D) 50", + "choices": [ + "15", + "100", + "50", + "50" + ], + "answer": "15", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 375, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "742": { + "question_id": "742", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the twig to the nearest inch. The twig is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 159, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "744": { + "question_id": "744", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If RL = 5, RT = 9, and WS = 6, find RW.\nChoices:\n(A) 5.4\n(B) 6\n(C) 6.6\n(D) 7.5", + "choices": [ + "5.4", + "6", + "6.6", + "7.5" + ], + "answer": "7.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5.4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 199, + "img_width": 404, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "746": { + "question_id": "746", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Mrs. Zimmerman hosts an annual art contest for kids, and she keeps a record of the number of entries each year. According to the table, what was the rate of change between 2013 and 2014? (Unit: entries per year)", + "choices": null, + "answer": "7", + "extraction": "13", + "prediction": "13", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 199, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "748": { + "question_id": "748", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, PA and PB are tangents of \u2299O, the tangent point of point A and B, AC is the diameter of \u2299O, given that \u2220P = 50.0, then the size of \u2220ACB is ()\nChoices:\n(A) 65\u00b0\n(B) 60\u00b0\n(C) 55\u00b0\n(D) 50\u00b0", + "choices": [ + "65\u00b0", + "60\u00b0", + "55\u00b0", + "50\u00b0" + ], + "answer": "65\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 117, + "img_width": 207, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "750": { + "question_id": "750", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "18", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 356, + "img_width": 290, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "752": { + "question_id": "752", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cPA\u662f\u2299O\u7684\u5207\u7ebf\uff0c\u5207\u70b9\u4e3aA\uff0cOP\uff1d4\uff0c\u2220APO\uff1d30\u00b0\uff0c\u5219\u2299O\u7684\u534a\u5f84\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) \u221a{3}\n(C) 2\n(D) 3", + "choices": [ + "1", + "\u221a{3}", + "2", + "3" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 87, + "img_width": 122, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "754": { + "question_id": "754", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Base your answers on the diagram below, which shows a partial food web. What will happen to fish population if algae's are decreased?\nChoices:\n(A) Population will decrease\n(B) Population will remain the same\n(C) Population will increase\n(D) None of the above", + "choices": [ + "Population will decrease", + "Population will remain the same", + "Population will increase", + "None of the above" + ], + "answer": "Population will decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Population will decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 364, + "img_width": 464, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "756": { + "question_id": "756", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the trees died, the population of porcupine would most likely\nChoices:\n(A) double\n(B) skyrocket\n(C) decrease\n(D) increase", + "choices": [ + "double", + "skyrocket", + "decrease", + "increase" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "double", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 591, + "img_width": 765, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "758": { + "question_id": "758", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny purple trucks behind the small matte motorbike less than the number of fighters that are behind the big metal utility bike?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "760": { + "question_id": "760", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of yellow tandem bikes less than the number of big objects?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "762": { + "question_id": "762", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the center of symmetry of this function?\nChoices:\n(A) (0, 0)\n(B) (-1, 0)\n(C) (2, 0)", + "choices": [ + "(0, 0)", + "(-1, 0)", + "(2, 0)" + ], + "answer": "(0, 0)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(0, 0)", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 395, + "img_width": 500, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "764": { + "question_id": "764", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average number of bananas on each stock?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 349, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "766": { + "question_id": "766", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tiny red trucks than small blue bicycles?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "768": { + "question_id": "768", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use the graph to answer the question below. Which month is the hottest on average in Rome?\nChoices:\n(A) December, January, and February\n(B) July and August\n(C) March and April", + "choices": [ + "December, January, and February", + "July and August", + "March and April" + ], + "answer": "July and August", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "December, January, and February", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "elementary school", + "img_height": 323, + "img_width": 448, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "770": { + "question_id": "770", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the amplitude of this function?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 276, + "img_width": 482, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "772": { + "question_id": "772", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of small yellow shiny motorbikes greater than the number of red rubber fighters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "774": { + "question_id": "774", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer large matte utility bikes than small yellow bicycles?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "776": { + "question_id": "776", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $JQ$ if $Q$ is the incenter of $\\triangle JLN$. Rounded to the nearest hundredth.\nChoices:\n(A) 16.50\n(B) 18.79\n(C) 20.32\n(D) 25.50", + "choices": [ + "16.50", + "18.79", + "20.32", + "25.50" + ], + "answer": "18.79", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "16.50", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 424, + "img_width": 589, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "778": { + "question_id": "778", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you find the missing shape in this picture puzzle?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D", + "choices": [ + "A", + "B", + "C", + "D" + ], + "answer": "A", + "extraction": "D", + "prediction": "D", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 431, + "img_width": 797, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "780": { + "question_id": "780", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 209, + "img_width": 848, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "782": { + "question_id": "782", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "4", + "extraction": "18", + "prediction": "18", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 376, + "img_width": 384, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "784": { + "question_id": "784", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Across all years, what is the maximum rating of statistical capacity in Maldives ?", + "choices": null, + "answer": "70", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 938, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "786": { + "question_id": "786", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle K$\nChoices:\n(A) 6\n(B) 60\n(C) 100\n(D) 180", + "choices": [ + "6", + "60", + "100", + "180" + ], + "answer": "100", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 237, + "img_width": 317, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "788": { + "question_id": "788", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 332, + "img_width": 515, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "790": { + "question_id": "790", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u25b3ABC\u4e2d\uff0cN\u662fBC\u8fb9\u4e0a\u7684\u4e2d\u70b9\uff0cAM\u5e73\u5206\u2220BAC\uff0cBM\u22a5AM\u4e8e\u70b9M\uff0c\u82e5AB\uff1d8\uff0cMN\uff1d2\uff0e\u5219AC\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 10\n(B) 11\n(C) 12\n(D) 13", + "choices": [ + "10", + "11", + "12", + "13" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 105, + "img_width": 145, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "792": { + "question_id": "792", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2624, + "img_width": 3936, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "794": { + "question_id": "794", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values larger than 4?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "796": { + "question_id": "796", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1938, + "img_width": 2516, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "798": { + "question_id": "798", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, l || m. Which of the following must equal 180?\nChoices:\n(A) k + n + r\n(B) k + p + s\n(C) n + p + s\n(D) n + p + t\n(E) r + s + t", + "choices": [ + "k + n + r", + "k + p + s", + "n + p + s", + "n + p + t", + "r + s + t" + ], + "answer": "k + p + s", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "k + n + r", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 372, + "img_width": 371, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "800": { + "question_id": "800", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Medium Orchid intersect Forest Green?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 596, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "802": { + "question_id": "802", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Karen bought 4 pounds of silk scraps and 4 pounds of canvas scraps. How much did she spend? (Unit: $)", + "choices": null, + "answer": "69", + "extraction": "36", + "prediction": "36", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 194, + "img_width": 243, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "804": { + "question_id": "804", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\odot B$, $CE=13.5$. Find $BD$. Round to the nearest hundredth.\nChoices:\n(A) 3.71\n(B) 4.29\n(C) 4.53\n(D) 6.75", + "choices": [ + "3.71", + "4.29", + "4.53", + "6.75" + ], + "answer": "4.29", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3.71", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 524, + "img_width": 493, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "806": { + "question_id": "806", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, and point C is on \u2299O. If \u2220A = 40.0, then the degree of \u2220B is ()\nChoices:\n(A) 80\u00b0\n(B) 60\u00b0\n(C) 50\u00b0\n(D) 40\u00b0", + "choices": [ + "80\u00b0", + "60\u00b0", + "50\u00b0", + "40\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "80\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 107, + "img_width": 127, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "808": { + "question_id": "808", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large purple spheres. Subtract all small gray things. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "810": { + "question_id": "810", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow metallic balls. Subtract all small yellow shiny things. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "812": { + "question_id": "812", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does the gray bar always have smaller value?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 1286, + "img_width": 840, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "814": { + "question_id": "814", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest individual bar in the whole chart?", + "choices": null, + "answer": "100000000", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "816": { + "question_id": "816", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x. Round to the nearest tenth, if necessary.\nChoices:\n(A) 3\n(B) 9\n(C) 12.25\n(D) 24", + "choices": [ + "3", + "9", + "12.25", + "24" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 272, + "img_width": 379, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "818": { + "question_id": "818", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What's the ratio of least value of light brown graph and leftmost value of dark brown graph?", + "choices": null, + "answer": "0.32", + "extraction": "0.07", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 434, + "img_width": 310, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "820": { + "question_id": "820", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $a=14, b=48,$ and $c=50$ find $cosA$\nChoices:\n(A) 0.14\n(B) 0.48\n(C) 0.50\n(D) 0.96", + "choices": [ + "0.14", + "0.48", + "0.50", + "0.96" + ], + "answer": "0.96", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.14", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 160, + "img_width": 238, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "822": { + "question_id": "822", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the perimeter of the parallelogram. Round to the nearest tenth if necessary.\nChoices:\n(A) 22\n(B) 40\n(C) 44\n(D) 48", + "choices": [ + "22", + "40", + "44", + "48" + ], + "answer": "44", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "22", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 227, + "img_width": 356, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "824": { + "question_id": "824", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)", + "choices": null, + "answer": "0.13", + "extraction": "0.97", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 192, + "img_width": 247, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "826": { + "question_id": "826", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is the largest part of the lung?\nChoices:\n(A) Inferior lobes\n(B) Cardiac notch\n(C) Superior lobes\n(D) Middle lobe", + "choices": [ + "Inferior lobes", + "Cardiac notch", + "Superior lobes", + "Middle lobe" + ], + "answer": "Superior lobes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Inferior lobes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 479, + "img_width": 638, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "828": { + "question_id": "828", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Linda wants to buy 0.9 pounds of double chocolate cookie dough. How much will she spend? (Unit: $)", + "choices": null, + "answer": "2.7", + "extraction": "3.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 194, + "img_width": 357, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "830": { + "question_id": "830", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "2", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 870, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "832": { + "question_id": "832", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(0)?", + "choices": null, + "answer": "-2", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1920, + "img_width": 1920, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "834": { + "question_id": "834", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the states that border Georgia , does Florida have the lowest value ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 610, + "img_width": 785, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "836": { + "question_id": "836", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the smallest species shown?\nChoices:\n(A) chinlea\n(B) arganodus\n(C) semionotus\n(D) xenacanthus", + "choices": [ + "chinlea", + "arganodus", + "semionotus", + "xenacanthus" + ], + "answer": "semionotus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "chinlea", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1076, + "img_width": 1500, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "838": { + "question_id": "838", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1200, + "img_width": 1600, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "840": { + "question_id": "840", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which item can you get the most protein?\nChoices:\n(A) salami\n(B) wine\n(C) cheese\n(D) bread", + "choices": [ + "salami", + "wine", + "cheese", + "bread" + ], + "answer": "salami", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "salami", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 375, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "842": { + "question_id": "842", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At a certain moment, there is a passenger ship at sea point P, and lighthouse A is measured in the direction 30.0 north by east of P, and is 50.0 nautical miles away. The passenger ship sails at the speed of 60.0 nautical mile/hour in the direction of 60.0 from north by west for $\\frac{2.0}{3.0}$hours to reach point B, then tan\u2220BAP = ()\nChoices:\n(A) \\frac{4}{5}\n(B) \\frac{6}{5}\n(C) \\frac{\u221a{5}}{5}\n(D) \\frac{2\u221a{5}}{5}", + "choices": [ + "\\frac{4}{5}", + "\\frac{6}{5}", + "\\frac{\u221a{5}}{5}", + "\\frac{2\u221a{5}}{5}" + ], + "answer": "\\frac{4}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{4}{5}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 115, + "img_width": 154, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "844": { + "question_id": "844", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the larger window shaped like the smaller window?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "846": { + "question_id": "846", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Brown the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 758, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "848": { + "question_id": "848", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the tuberculosis treatment success rate in Bulgaria greater than the average tuberculosis treatment success rate in Bulgaria taken over all years ?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1091, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "850": { + "question_id": "850", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of cars in front of the tiny metal thing less than the number of large matte things in front of the cyan rubber road bike?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "852": { + "question_id": "852", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "40", + "extraction": "19", + "prediction": "19", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 598, + "img_width": 612, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "854": { + "question_id": "854", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the pelicans in the community were eradicated, which population feel the most direct effect?\nChoices:\n(A) Plant\n(B) Phyto-plankton\n(C) Fish\n(D) Lizard", + "choices": [ + "Plant", + "Phyto-plankton", + "Fish", + "Lizard" + ], + "answer": "Fish", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Plant", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 947, + "img_width": 850, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "856": { + "question_id": "856", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture has the least leaves?\nChoices:\n(A) Both\n(B) Compound\n(C) Simple\n(D) Neither", + "choices": [ + "Both", + "Compound", + "Simple", + "Neither" + ], + "answer": "Simple", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Both", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 300, + "img_width": 400, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "858": { + "question_id": "858", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On the basis of the given food web, which organism will increase in number if there were no seals?\nChoices:\n(A) Shark\n(B) Small Shrimp\n(C) Octopus\n(D) Mysid Shrimp", + "choices": [ + "Shark", + "Small Shrimp", + "Octopus", + "Mysid Shrimp" + ], + "answer": "Octopus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Shark", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 764, + "img_width": 1162, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "860": { + "question_id": "860", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Miss Foley ran a sit-up competition among her P.E. students and monitored how many sit-ups each students could do. What is the largest number of sit-ups done? (Unit: sit-ups)", + "choices": null, + "answer": "86", + "extraction": "256", + "prediction": "256", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 246, + "img_width": 291, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "862": { + "question_id": "862", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: One of the most dramatic videos on the web (but entirely fictitious) supposedly shows a man sliding along a long water slide and then being launched into the air to land in a water pool. Let's attach some reasonable numbers to such a flight to calculate the velocity with which the man would have hit the water. Figure indicates the launch and landing sites and includes a superimposed coordinate system with its origin conveniently located at the launch site. From the video we take the horizontal flight distance as $D=20.0 \\mathrm{~m}$, the flight time as $t=2.50 \\mathrm{~s}$, and the launch angle as $\\theta_0=40.0^{\\circ}$. Find the magnitude of the velocity at launch and at landing.", + "choices": null, + "answer": "10.44", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 600, + "img_width": 1302, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "864": { + "question_id": "864", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "16", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1738, + "img_width": 2480, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "866": { + "question_id": "866", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For trapezoid $Q R S T, A$ and $B$ are midpoints of the legs. Find $m \\angle S$\nChoices:\n(A) 45\n(B) 60\n(C) 120\n(D) 135", + "choices": [ + "45", + "60", + "120", + "135" + ], + "answer": "135", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 169, + "img_width": 359, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "868": { + "question_id": "868", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big green cylinders. Subtract all rubber cylinders. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "5", + "prediction": "5", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "870": { + "question_id": "870", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there more tiny motorbikes in front of the small cyan tandem bike than big cyan metal double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "872": { + "question_id": "872", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the next shape.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D", + "choices": [ + "A", + "B", + "C", + "D" + ], + "answer": "D", + "extraction": "C", + "prediction": "C", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 496, + "img_width": 1472, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "874": { + "question_id": "874", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of y at x=-2.5?", + "choices": null, + "answer": "2", + "extraction": "-2", + "prediction": "-2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 479, + "img_width": 479, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "876": { + "question_id": "876", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, square $ABDC$ is inscribed in $\\odot K$. Find the measure of a central angle.\nChoices:\n(A) 45\n(B) 60\n(C) 90\n(D) 180", + "choices": [ + "45", + "60", + "90", + "180" + ], + "answer": "90", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 275, + "img_width": 273, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "878": { + "question_id": "878", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220ACB\uff1d90\u00b0\uff0c\u4ee5Rt\u25b3ABC\u7684\u4e09\u8fb9\u4e3a\u8fb9\u5411\u5916\u4f5c\u6b63\u65b9\u5f62\uff0c\u5176\u9762\u79ef\u5206\u522b\u4e3aS1\uff0cS2\uff0cS3\uff0c\u4e14S1\uff1d5\uff0cS3\uff1d16\uff0c\u5219S2\uff1d\uff08\uff09\nChoices:\n(A) 6\n(B) 2\u221a{2}\n(C) 11\n(D) 24", + "choices": [ + "6", + "2\u221a{2}", + "11", + "24" + ], + "answer": "11", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 82, + "img_width": 94, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "880": { + "question_id": "880", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What's the total add up value of largest and smallest bar?", + "choices": null, + "answer": "252.65", + "extraction": "12.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "882": { + "question_id": "882", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Lawn Green the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 677, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "884": { + "question_id": "884", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the blue kite in the lower right corner shaped like?\nChoices:\n(A) ferret\n(B) cat\n(C) cloud\n(D) octopus", + "choices": [ + "ferret", + "cat", + "cloud", + "octopus" + ], + "answer": "octopus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "ferret", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "886": { + "question_id": "886", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A newspaper researched how many grocery stores there are in each town. What is the median of the numbers?'", + "choices": null, + "answer": "6", + "extraction": "6", + "prediction": "6", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 235, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "888": { + "question_id": "888", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small green shiny balls. Subtract all small metallic things. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "890": { + "question_id": "890", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is larger the moon or the sun?\nChoices:\n(A) Sun\n(B) It varies\n(C) They are equal in size\n(D) Moon", + "choices": [ + "Sun", + "It varies", + "They are equal in size", + "Moon" + ], + "answer": "Sun", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Sun", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 844, + "img_width": 1500, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "892": { + "question_id": "892", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does New Jersey have a higher value than Georgia ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "894": { + "question_id": "894", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms fat and acre?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "896": { + "question_id": "896", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Approximately, what percentage of jewelry sales in January were Rings?\nChoices:\n(A) Around 21%\n(B) Around 27%\n(C) Around 31%\n(D) Around 37%", + "choices": [ + "Around 21%", + "Around 27%", + "Around 31%", + "Around 37%" + ], + "answer": "Around 31%", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Around 21%", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "bar chart", + "grade": "elementary school", + "img_height": 464, + "img_width": 758, + "language": "english", + "skills": [ + "logical reasoning", + "statistical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "898": { + "question_id": "898", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, A, B, and C are the three points on \u2299O, if \u2220C = 35.0, then the degree of \u2220OAB is ()\nChoices:\n(A) 35\u00b0\n(B) 55\u00b0\n(C) 65\u00b0\n(D) 70\u00b0", + "choices": [ + "35\u00b0", + "55\u00b0", + "65\u00b0", + "70\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 109, + "img_width": 112, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "900": { + "question_id": "900", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of rubber cars less than the number of brown jets?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "902": { + "question_id": "902", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the leaf base has an angle greater than 90 degrees, what is it called?\nChoices:\n(A) obtuse\n(B) decurrent\n(C) cuneate\n(D) acute", + "choices": [ + "obtuse", + "decurrent", + "cuneate", + "acute" + ], + "answer": "obtuse", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "obtuse", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1429, + "img_width": 1500, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "904": { + "question_id": "904", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "906": { + "question_id": "906", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sum of smallest two value is greater then then largest value?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "908": { + "question_id": "908", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: which organism would most likely have a decrease in its population if decrease the population of ant base of above diagram?\nChoices:\n(A) plant\n(B) human\n(C) lizard\n(D) snake", + "choices": [ + "plant", + "human", + "lizard", + "snake" + ], + "answer": "lizard", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "plant", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 497, + "img_width": 312, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "910": { + "question_id": "910", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue metal balls. Subtract all large matte things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "912": { + "question_id": "912", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "4", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 413, + "img_width": 629, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "914": { + "question_id": "914", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny purple shiny cubes. Subtract all large purple balls. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "916": { + "question_id": "916", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220C = 90.0, \u2220A = 30.0, BC = 2.0, the radius of \u2299C is 1.0, point P is the point on the hypotenuse AB, passing point P is a tangent PQ of \u2299C (Point Q is the tangent point), then the minimum value of the line segment PQ is ()\nChoices:\n(A) 2\n(B) \u221a{3}\n(C) \u221a{2}\n(D) 2-\\frac{\u221a{3}}{3}", + "choices": [ + "2", + "\u221a{3}", + "\u221a{2}", + "2-\\frac{\u221a{3}}{3}" + ], + "answer": "\u221a{2}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 145, + "img_width": 112, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "918": { + "question_id": "918", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Calculate the missing item.", + "choices": null, + "answer": "1", + "extraction": "13", + "prediction": "13", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 492, + "img_width": 538, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "920": { + "question_id": "920", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The measure of angle BAC equals x*\\degree. What is the value of x?", + "choices": null, + "answer": "30", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 310, + "img_width": 388, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "922": { + "question_id": "922", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual element in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "924": { + "question_id": "924", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Periwinkle have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 587, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "926": { + "question_id": "926", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the size of the shaded area under the curve? Round the answer to 2 decimal places", + "choices": null, + "answer": "7.07", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 312, + "img_width": 433, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "928": { + "question_id": "928", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much more does a navy blue bath mat cost than a yellow bath towel? (Unit: $)", + "choices": null, + "answer": "5", + "extraction": "7", + "prediction": "7", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 160, + "img_width": 234, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "930": { + "question_id": "930", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cF\u662f\u25b3ABC\u7684\u89d2\u5e73\u5206\u7ebfCD\u548cBE\u7684\u4ea4\u70b9\uff0cCG\u22a5AB\u4e8e\u70b9G\uff0e\u82e5\u2220ACG\uff1d32\u00b0\uff0c\u5219\u2220BFC\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 119\u00b0\n(B) 122\u00b0\n(C) 148\u00b0\n(D) 150\u00b0", + "choices": [ + "119\u00b0", + "122\u00b0", + "148\u00b0", + "150\u00b0" + ], + "answer": "119\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "119\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 79, + "img_width": 113, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "932": { + "question_id": "932", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to the phytoplankton if krill increased?\nChoices:\n(A) decrease\n(B) increase\n(C) can't be predicted\n(D) stay the same", + "choices": [ + "decrease", + "increase", + "can't be predicted", + "stay the same" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 350, + "img_width": 750, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "934": { + "question_id": "934", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "10000", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "936": { + "question_id": "936", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 892, + "img_width": 710, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "938": { + "question_id": "938", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, $m \u22209 = 75$. Find the measure of $\\angle 6$.\nChoices:\n(A) 75\n(B) 85\n(C) 95\n(D) 105", + "choices": [ + "75", + "85", + "95", + "105" + ], + "answer": "105", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "75", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 278, + "img_width": 417, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "940": { + "question_id": "940", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big red things. Subtract all metallic things. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "7", + "prediction": "7", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "942": { + "question_id": "942", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(0)?", + "choices": null, + "answer": "0", + "extraction": "-10", + "prediction": "-10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 395, + "img_width": 500, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "944": { + "question_id": "944", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 241, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "946": { + "question_id": "946", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "16", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 373, + "img_width": 560, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "948": { + "question_id": "948", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Some students compared how many blocks they live from school. What is the mean of the numbers?'", + "choices": null, + "answer": "11", + "extraction": "14", + "prediction": "14", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 311, + "img_width": 207, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "950": { + "question_id": "950", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The slope of f(x) at x=0 is ____\nChoices:\n(A) positive\n(B) negative\n(C) zero\n(D) undefined", + "choices": [ + "positive", + "negative", + "zero", + "undefined" + ], + "answer": "positive", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "positive", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 744, + "img_width": 1114, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "952": { + "question_id": "952", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Base your answers on the food web below and on your knowledge of biology. A decrease in the Aquatic crustaceans population will most immediately decrease the available energy for the\nChoices:\n(A) Minnows\n(B) Ducks\n(C) Fish\n(D) Raccoons", + "choices": [ + "Minnows", + "Ducks", + "Fish", + "Raccoons" + ], + "answer": "Fish", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Minnows", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 258, + "img_width": 456, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "954": { + "question_id": "954", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A partial food web is shown below. Which of the following will most likely happen if the snake population decreases?\nChoices:\n(A) Cricket will increase\n(B) Mouse will increase\n(C) Rabbit will increase\n(D) All of above", + "choices": [ + "Cricket will increase", + "Mouse will increase", + "Rabbit will increase", + "All of above" + ], + "answer": "All of above", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Cricket will increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 277, + "img_width": 475, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "956": { + "question_id": "956", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small blue rubber objects. Subtract all brown shiny balls. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "958": { + "question_id": "958", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the missing letters from below to form a word, using all letters presented\nChoices:\n(A) A, R, N\n(B) R, D, N\n(C) I, A, M\n(D) H, O, W", + "choices": [ + "A, R, N", + "R, D, N", + "I, A, M", + "H, O, W" + ], + "answer": "R, D, N", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "A, R, N", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 773, + "img_width": 945, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "960": { + "question_id": "960", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1365, + "img_width": 2048, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "962": { + "question_id": "962", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The value of y at x=10 is ____ that at x=70.\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "smaller than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 301, + "img_width": 387, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "964": { + "question_id": "964", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 70, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "966": { + "question_id": "966", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the pencil to the nearest inch. The pencil is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "7", + "prediction": "7", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 166, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "968": { + "question_id": "968", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue balls. Subtract all big yellow rubber balls. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "970": { + "question_id": "970", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u4e24\u76f4\u7ebfa\uff0cb\u88ab\u76f4\u7ebfc\u6240\u622a\uff0c\u5df2\u77e5a\u2225b\uff0c\u22201\uff1d62\u00b0\uff0c\u5219\u22202\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 62\u00b0\n(B) 108\u00b0\n(C) 118\u00b0\n(D) 128\u00b0", + "choices": [ + "62\u00b0", + "108\u00b0", + "118\u00b0", + "128\u00b0" + ], + "answer": "118\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "62\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 141, + "img_width": 135, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "972": { + "question_id": "972", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of yellow shiny utility bikes greater than the number of brown metallic cruisers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "974": { + "question_id": "974", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there the same number of big blue trucks and large purple metal double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "976": { + "question_id": "976", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of metal biplanes behind the purple shiny object less than the number of purple school buss behind the big red object?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "978": { + "question_id": "978", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Allie kept a written log of how many miles she biked during the past 7 days. What is the range of the numbers?'", + "choices": null, + "answer": "7", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 280, + "img_width": 230, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "980": { + "question_id": "980", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest number shown?", + "choices": null, + "answer": "12", + "extraction": "12", + "prediction": "12", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 640, + "img_width": 429, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "982": { + "question_id": "982", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the states that border Wyoming , does South Dakota have the highest value ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "984": { + "question_id": "984", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of gray cars less than the number of small metallic minivans?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "986": { + "question_id": "986", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0cAD\u662f\u89d2\u5e73\u5206\u7ebf\uff0cAE\u662f\u9ad8\uff0e\u82e5\u2220B\uff1d40\u00b0\uff0c\u2220C\uff1d70\u00b0\uff0c\u5219\u2220EAD\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 10\u00b0\n(B) 15\u00b0\n(C) 17.5\u00b0\n(D) 20\u00b0", + "choices": [ + "10\u00b0", + "15\u00b0", + "17.5\u00b0", + "20\u00b0" + ], + "answer": "15\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 68, + "img_width": 101, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "988": { + "question_id": "988", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 333, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "990": { + "question_id": "990", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\odot S$, $m \\widehat {PQR}=98$, Find $m \\widehat {PQ}$.\nChoices:\n(A) 45\n(B) 49\n(C) 90\n(D) 98", + "choices": [ + "45", + "49", + "90", + "98" + ], + "answer": "49", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 452, + "img_width": 544, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "992": { + "question_id": "992", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of purple metallic things that are behind the small green motorbike less than the number of blue metal articulated buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "994": { + "question_id": "994", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Magenta greater than Web Maroon?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 548, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "996": { + "question_id": "996", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big shiny balls. Subtract all blue rubber blocks. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "998": { + "question_id": "998", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff1a\u2220AOB\uff1a\u2220BOC\uff1a\u2220COD\uff1d2\uff1a3\uff1a4\uff0c\u5c04\u7ebfOM\u3001ON\uff0c\u5206\u522b\u5e73\u5206\u2220AOB\u4e0e\u2220COD\uff0c\u53c8\u2220MON\uff1d84\u00b0\uff0c\u5219\u2220AOB\u4e3a\uff08\uff09\nChoices:\n(A) 28\u00b0\n(B) 30\u00b0\n(C) 32\u00b0\n(D) 38\u00b0", + "choices": [ + "28\u00b0", + "30\u00b0", + "32\u00b0", + "38\u00b0" + ], + "answer": "28\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "28\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 181, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "1000": { + "question_id": "1000", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown matte cylinders. Subtract all big purple matte things. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + } +} \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mathvista_testmini.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mathvista_testmini.json new file mode 100644 index 0000000000000000000000000000000000000000..6e31198d95d1cd2043b0adda4df7a45274816f9f --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mathvista_testmini.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86378d064486a95db78016ad5425190064d153e0f9021768efc7fa43820edd8c +size 45276045 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mme.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mme.json new file mode 100644 index 0000000000000000000000000000000000000000..5bcbe8e9297fa5ce459b58599e64b157068be636 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mme.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a82efa94480755a72c2a10eda7fb180405a023f189abefceb5ffb7063f342426 +size 94631601 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mmmu_val.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mmmu_val.json new file mode 100644 index 0000000000000000000000000000000000000000..295b5a7c7165f5d6cf410b5dd0a5145fbd488fdb --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mmmu_val.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83c498ee36877cbd7e8a0700b0d8e55d660a8b97813d5104e13a420f69cd4e9b +size 36750667 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mmstar.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mmstar.json new file mode 100644 index 0000000000000000000000000000000000000000..05a9a5d22985c2b7a0a5f3d00cdbc078c1578255 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mmstar.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5965c6560c55dd4db61a68e735a651305207df081f43f026b6c609641ccd2348 +size 60427706 diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/rank0_metric_eval_done.txt b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/rank0_metric_eval_done.txt new file mode 100644 index 0000000000000000000000000000000000000000..b9c064df42468d805177a80623c54c976c8d760e --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/rank0_metric_eval_done.txt @@ -0,0 +1 @@ +rank 0 eval done \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/rank1_metric_eval_done.txt b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/rank1_metric_eval_done.txt new file mode 100644 index 0000000000000000000000000000000000000000..36792c9cedb6c006db3a866d72eac15f0ce6a64a --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/rank1_metric_eval_done.txt @@ -0,0 +1 @@ +rank 1 eval done \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/results.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/results.json new file mode 100644 index 0000000000000000000000000000000000000000..511576ffbdbbde50fe25e018cf1b4d5d7c67dbcc --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/results.json @@ -0,0 +1,285 @@ +{ + "results": { + "mathvista_testmini": { + "gpt_eval_score,none": 24.0, + "gpt_eval_score_stderr,none": "N/A", + "alias": "mathvista_testmini" + }, + "mme": { + "mme_cognition_score,none": 305.3571428571429, + "mme_cognition_score_stderr,none": "N/A", + "mme_percetion_score,none": 1400.8141256502602, + "mme_percetion_score_stderr,none": "N/A", + "alias": "mme" + }, + "mmmu_val": { + "mmmu_acc,none": 0.42444, + "mmmu_acc_stderr,none": "N/A", + "alias": "mmmu_val" + }, + "mmstar": { + "coarse perception,none": 0.7007139127935286, + "coarse perception_stderr,none": "N/A", + "fine-grained perception,none": 0.36133628101203635, + "fine-grained perception_stderr,none": "N/A", + "instance reasoning,none": 0.5332709723793451, + "instance reasoning_stderr,none": "N/A", + "logical reasoning,none": 0.37952487556447956, + "logical reasoning_stderr,none": "N/A", + "math,none": 0.2904916243428673, + "math_stderr,none": "N/A", + "science & technology,none": 0.2396233390154238, + "science & technology_stderr,none": "N/A", + "alias": "mmstar" + } + }, + "configs": { + "mathvista_testmini": { + "task": "mathvista_testmini", + "dataset_path": "AI4Math/MathVista", + "dataset_kwargs": { + "token": true + }, + "test_split": "testmini", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "gpt_eval_score", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "until": [ + "ASSISTANT:" + ], + "max_new_tokens": 1024, + "temperature": 0.0, + "top_p": 1.0, + "num_beams": 1, + "do_sample": false, + "image_aspect_ratio": "original" + }, + "repeats": 1, + "should_decontaminate": false, + "model_specific_prompt_kwargs": { + "default": { + "shot_type": "format-prompt", + "shot": 0, + "use_caption": false, + "use_ocr": false + }, + "phi3v": { + "shot_type": "solution" + } + }, + "model_specific_generation_kwargs": { + "llava": { + "image_aspect_ratio": "original" + } + } + }, + "mme": { + "task": "mme", + "dataset_path": "lmms-lab/MME", + "dataset_kwargs": { + "token": false + }, + "test_split": "test", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "mme_percetion_score", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "mme_cognition_score", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "max_new_tokens": 16, + "temperature": 0.0, + "top_p": 1.0, + "num_beams": 1, + "do_sample": false, + "until": [ + "\n\n" + ] + }, + "repeats": 1, + "should_decontaminate": false, + "metadata": [ + { + "version": 0.0 + } + ], + "model_specific_prompt_kwargs": { + "default": { + "pre_prompt": "", + "post_prompt": "\nAnswer the question using a single word or phrase." + }, + "gpt4v": { + "pre_prompt": "", + "post_prompt": "\nAnswer the question with Yes or No." + }, + "qwen_vl": { + "pre_prompt": "", + "post_prompt": " Answer:" + }, + "otterhd": { + "pre_prompt": "", + "post_prompt": " Answer:" + }, + "xcomposer2_4khd": { + "pre_prompt": "[UNUSED_TOKEN_146]user\n", + "post_prompt": " Answer this question briefly[UNUSED_TOKEN_145]\n[UNUSED_TOKEN_146]assistant\n" + } + } + }, + "mmmu_val": { + "task": "mmmu_val", + "dataset_path": "lmms-lab/MMMU", + "test_split": "validation", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "mmmu_acc", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "max_new_tokens": 128, + "until": [ + "\n\n" + ], + "image_aspect_ratio": "original" + }, + "repeats": 1, + "should_decontaminate": false, + "metadata": [ + { + "version": 0.0 + } + ], + "model_specific_generation_kwargs": { + "llava": { + "image_aspect_ratio": "original" + } + } + }, + "mmstar": { + "task": "mmstar", + "dataset_path": "Lin-Chen/MMStar", + "dataset_kwargs": { + "token": true + }, + "test_split": "val", + "doc_to_visual": "", + "doc_to_text": "", + "doc_to_target": "answer", + "process_results": "", + "description": "", + "target_delimiter": " ", + "fewshot_delimiter": "\n\n", + "metric_list": [ + { + "metric": "coarse perception", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "fine-grained perception", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "instance reasoning", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "logical reasoning", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "science & technology", + "aggregation": "", + "higher_is_better": true + }, + { + "metric": "math", + "aggregation": "", + "higher_is_better": true + } + ], + "output_type": "generate_until", + "generation_kwargs": { + "until": [ + "\n\n" + ], + "do_sample": false + }, + "repeats": 1, + "should_decontaminate": false, + "metadata": [ + { + "version": 0.0 + } + ], + "model_specific_prompt_kwargs": { + "default": { + "pre_prompt": "", + "post_prompt": "\nAnswer with the option's letter from the given choices directly" + } + } + } + }, + "versions": { + "mathvista_testmini": "Yaml", + "mme": "Yaml", + "mmmu_val": "Yaml", + "mmstar": "Yaml" + }, + "n-shot": { + "mathvista_testmini": 0, + "mme": 0, + "mmmu_val": 0, + "mmstar": 0 + }, + "model_configs": { + "model": "llava", + "model_args": "pretrained=/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/SMOE/665K36/revise_Full_smoe_sharev3/checkpoint-20791,conv_template=phi35", + "batch_size": "1", + "device": null, + "limit": null, + "bootstrap_iters": 100000, + "gen_kwargs": "" + }, + "git_hash": "289c7fe5" +} \ No newline at end of file diff --git a/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/submissions/mathvista_testmini_scores.json b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/submissions/mathvista_testmini_scores.json new file mode 100644 index 0000000000000000000000000000000000000000..43f9ae5e2d27522cd841df6818a12d8a0ceef207 --- /dev/null +++ b/sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/submissions/mathvista_testmini_scores.json @@ -0,0 +1,26873 @@ +{ + "1": { + "question_id": "1", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: When a spring does work on an object, we cannot find the work by simply multiplying the spring force by the object's displacement. The reason is that there is no one value for the force-it changes. However, we can split the displacement up into an infinite number of tiny parts and then approximate the force in each as being constant. Integration sums the work done in all those parts. Here we use the generic result of the integration.\r\n\r\nIn Figure, a cumin canister of mass $m=0.40 \\mathrm{~kg}$ slides across a horizontal frictionless counter with speed $v=0.50 \\mathrm{~m} / \\mathrm{s}$. It then runs into and compresses a spring of spring constant $k=750 \\mathrm{~N} / \\mathrm{m}$. When the canister is momentarily stopped by the spring, by what distance $d$ is the spring compressed?", + "choices": null, + "answer": "1.2", + "extraction": "0.1", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 720, + "img_width": 1514, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "3": { + "question_id": "3", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u25b3ABC\u7684\u4e24\u5185\u89d2\u5e73\u5206\u7ebfOB\u3001OC\u76f8\u4ea4\u4e8e\u70b9O\uff0c\u82e5\u2220A\uff1d110\u00b0\uff0c\u5219\u2220BOC\uff1d\uff08\uff09\nChoices:\n(A) 135\u00b0\n(B) 140\u00b0\n(C) 145\u00b0\n(D) 150\u00b0", + "choices": [ + "135\u00b0", + "140\u00b0", + "145\u00b0", + "150\u00b0" + ], + "answer": "145\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "135\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 60, + "img_width": 131, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "5": { + "question_id": "5", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m\\angle H$\nChoices:\n(A) 97\n(B) 102\n(C) 107\n(D) 122", + "choices": [ + "97", + "102", + "107", + "122" + ], + "answer": "97", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "97", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 245, + "img_width": 322, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "7": { + "question_id": "7", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) after eight.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "9": { + "question_id": "9", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\u662f\u4e00\u682a\u7f8e\u4e3d\u7684\u52fe\u80a1\u6811\uff0c\u5176\u4e2d\u6240\u6709\u56db\u8fb9\u5f62\u90fd\u662f\u6b63\u65b9\u5f62\uff0c\u6240\u6709\u7684\u4e09\u89d2\u5f62\u90fd\u662f\u76f4\u89d2\u4e09\u89d2\u5f62\uff0c\u82e5\u6b63\u65b9\u5f62A\u3001B\u7684\u9762\u79ef\u5206\u522b\u4e3a5\u30013\uff0c\u5219\u6700\u5927\u6b63\u65b9\u5f62C\u7684\u9762\u79ef\u662f\uff08\uff09\nChoices:\n(A) 15\n(B) 13\n(C) 11\n(D) 8", + "choices": [ + "15", + "13", + "11", + "8" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 155, + "img_width": 134, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "11": { + "question_id": "11", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red things. Subtract all tiny matte balls. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "13": { + "question_id": "13", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many objects are preferred by more than 90 percent of people in at least one category?", + "choices": null, + "answer": "0", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "15": { + "question_id": "15", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism with be most affected if algae was eliminated?\nChoices:\n(A) Tilapia\n(B) Common water flea\n(C) Great diving beetle\n(D) Tadpole", + "choices": [ + "Tilapia", + "Common water flea", + "Great diving beetle", + "Tadpole" + ], + "answer": "Common water flea", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Tilapia", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 232, + "img_width": 400, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "17": { + "question_id": "17", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220ACB\uff1d90\u00b0\uff0cD\u662fAB\u7684\u4e2d\u70b9\uff0cAB\uff1d10\uff0c\u5219CD\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 5\n(B) 6\n(C) 8\n(D) 10", + "choices": [ + "5", + "6", + "8", + "10" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 172, + "img_width": 125, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "19": { + "question_id": "19", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the highest amount this class measures?", + "choices": null, + "answer": "400", + "extraction": "400", + "prediction": "400", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 684, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "21": { + "question_id": "21", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 4 dots divided into 2 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 418, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "23": { + "question_id": "23", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The derivative of f(x) at x=2 is ____ that at x=5\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "equal to", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 393, + "img_width": 552, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "25": { + "question_id": "25", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Periwinkle the smoothest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 770, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "27": { + "question_id": "27", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "11", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1752, + "img_width": 2628, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "29": { + "question_id": "29", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 440, + "img_width": 670, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "31": { + "question_id": "31", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there more big red rubber double buss in front of the large red double bus than big green things?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "33": { + "question_id": "33", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use a sector paper sheet with a central angle of 120.0 and a radius of 6.0 to roll into a conical bottomless paper cap (as shown in the picture), then the bottom perimeter of the paper cap is ()\nChoices:\n(A) 2\u03c0cm\n(B) 3\u03c0cm\n(C) 4\u03c0cm\n(D) 5\u03c0cm", + "choices": [ + "2\u03c0cm", + "3\u03c0cm", + "4\u03c0cm", + "5\u03c0cm" + ], + "answer": "4\u03c0cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2\u03c0cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 95, + "img_width": 331, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "35": { + "question_id": "35", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u662f\u2299O\u7684\u76f4\u5f84\uff0cEF\uff0cEB\u662f\u2299O\u7684\u5f26\uff0c\u70b9E\u662fFEB\u7684\u4e2d\u70b9\uff0cEF\u4e0eAB\u4ea4\u4e8e\u70b9C\uff0c\u8fde\u63a5OF\uff0c\u82e5\u2220AOF\uff1d40\u00b0\uff0c\u5219\u2220F\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 20\u00b0\n(B) 35\u00b0\n(C) 40\u00b0\n(D) 55\u00b0", + "choices": [ + "20\u00b0", + "35\u00b0", + "40\u00b0", + "55\u00b0" + ], + "answer": "35\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 141, + "img_width": 151, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "37": { + "question_id": "37", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the limit as x approaches -1?", + "choices": null, + "answer": "3", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 410, + "img_width": 408, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "39": { + "question_id": "39", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function odd or even?\nChoices:\n(A) odd\n(B) even", + "choices": [ + "odd", + "even" + ], + "answer": "odd", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "odd", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 304, + "img_width": 433, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "41": { + "question_id": "41", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 3491, + "img_width": 5236, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "43": { + "question_id": "43", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use the graph to answer the question below. Which month is the wettest on average in Christchurch?\nChoices:\n(A) August\n(B) April\n(C) May", + "choices": [ + "August", + "April", + "May" + ], + "answer": "May", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "August", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "elementary school", + "img_height": 323, + "img_width": 449, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "45": { + "question_id": "45", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An administrator at the Department of Motor Vehicles (DMV) tracked the average wait time from month to month. According to the table, what was the rate of change between August and September? (Unit: minutes per month)", + "choices": null, + "answer": "-3", + "extraction": "-1", + "prediction": "-1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 273, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "47": { + "question_id": "47", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all rubber balls. Subtract all yellow shiny things. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "6", + "prediction": "6", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "49": { + "question_id": "49", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the digits on either end of the sign in the corner?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 476, + "img_width": 626, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "51": { + "question_id": "51", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of gray rubber objects in front of the small yellow aeroplane greater than the number of big cyan matte fighters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "53": { + "question_id": "53", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 593, + "img_width": 800, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "55": { + "question_id": "55", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u4e00\u5757\u76f4\u89d2\u4e09\u89d2\u677f60\u00b0\u7684\u89d2\u7684\u9876\u70b9A\u4e0e\u76f4\u89d2\u9876\u70b9C\u5206\u522b\u5728\u4e24\u5e73\u884c\u7ebfFG\uff0cDE\u4e0a\uff0c\u659c\u8fb9AB\u5e73\u5206\u2220CAG\uff0c\u4ea4\u76f4\u7ebfDE\u4e8e\u70b9H\uff0c\u5219\u2220BCH\u7684\u5927\u5c0f\u4e3a\uff08\uff09\nChoices:\n(A) 60\u00b0\n(B) 45\u00b0\n(C) 30\u00b0\n(D) 25\u00b0", + "choices": [ + "60\u00b0", + "45\u00b0", + "30\u00b0", + "25\u00b0" + ], + "answer": "30\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 125, + "img_width": 175, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "57": { + "question_id": "57", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small balls. Subtract all blue rubber things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "59": { + "question_id": "59", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, CD is the chord of \u2299O, \u2220ADC = 26.0, then the degree of \u2220CAB is ()\nChoices:\n(A) 26\u00b0\n(B) 74\u00b0\n(C) 64\u00b0\n(D) 54\u00b0", + "choices": [ + "26\u00b0", + "74\u00b0", + "64\u00b0", + "54\u00b0" + ], + "answer": "64\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "26\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 146, + "img_width": 157, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "61": { + "question_id": "61", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Coral the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 427, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "63": { + "question_id": "63", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red matte cubes. Subtract all small green metal objects. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "65": { + "question_id": "65", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: is f(3) > 0?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 325, + "img_width": 327, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "67": { + "question_id": "67", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the square?", + "choices": null, + "answer": "16", + "extraction": "16", + "prediction": "16", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 292, + "img_width": 320, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "69": { + "question_id": "69", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big matte balls. Subtract all green rubber objects. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "71": { + "question_id": "71", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the rectangle?", + "choices": null, + "answer": "18", + "extraction": "24", + "prediction": "24", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 292, + "img_width": 187, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "73": { + "question_id": "73", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Complete the matrix.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "D", + "extraction": "A", + "prediction": "A", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 654, + "img_width": 387, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "75": { + "question_id": "75", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Sky Blue less than Web Maroon?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "77": { + "question_id": "77", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year showed the largest difference in the data points between the two lines", + "choices": null, + "answer": "2019", + "extraction": "2014", + "prediction": "2014", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "79": { + "question_id": "79", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A, B, C, and D are on circle O, and point E is on the extended line of AD. If \u2220ABC = 60.0, then the degree of \u2220CDE is ()\nChoices:\n(A) 30\u00b0\n(B) 45\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "30\u00b0", + "45\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "60\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 104, + "img_width": 123, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "81": { + "question_id": "81", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of r at theta=3*pi/2?", + "choices": null, + "answer": "-1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 460, + "img_width": 616, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "83": { + "question_id": "83", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of shiny buss less than the number of matte things?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "85": { + "question_id": "85", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many countries have people working for more than 35 hours over the years?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "87": { + "question_id": "87", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At a price of $790, is there a shortage or a surplus?'\nChoices:\n(A) shortage\n(B) surplus", + "choices": [ + "shortage", + "surplus" + ], + "answer": "surplus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "shortage", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 353, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "89": { + "question_id": "89", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many miles per gallon do an average city bus get?", + "choices": null, + "answer": "25", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 384, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "91": { + "question_id": "91", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of brown suvs less than the number of brown rubber school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "93": { + "question_id": "93", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What's the computing and wirless total for semiconductor demand in 2014?", + "choices": null, + "answer": "197.3", + "extraction": "100.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "95": { + "question_id": "95", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the straight lines AB and CD intersect at point O, OD bisects \u2220AOE, \u2220BOC = 50.0, then \u2220EOB = ()\nChoices:\n(A) 50\u00b0\n(B) 60\u00b0\n(C) 70\u00b0\n(D) 80\u00b0", + "choices": [ + "50\u00b0", + "60\u00b0", + "70\u00b0", + "80\u00b0" + ], + "answer": "80\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 162, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "97": { + "question_id": "97", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracies higher than 9?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "99": { + "question_id": "99", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which cat is larger?\nChoices:\n(A) white five\n(B) white three\n(C) white four\n(D) white one\n(E) white two", + "choices": [ + "white five", + "white three", + "white four", + "white one", + "white two" + ], + "answer": "white one", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "white five", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "101": { + "question_id": "101", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which shape is most erect?\nChoices:\n(A) Lanceolate\n(B) Heart-shaped\n(C) Linear\n(D) Spatulate", + "choices": [ + "Lanceolate", + "Heart-shaped", + "Linear", + "Spatulate" + ], + "answer": "Linear", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Lanceolate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1204, + "img_width": 376, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "103": { + "question_id": "103", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small purple matte blocks. Subtract all blocks. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "105": { + "question_id": "105", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Violet have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 727, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "107": { + "question_id": "107", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past six.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "109": { + "question_id": "109", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny balls. Subtract all green metallic things. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "111": { + "question_id": "111", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big gray matte things. Subtract all small metallic cylinders. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "113": { + "question_id": "113", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many baseballs are there?", + "choices": null, + "answer": "20", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 458, + "img_width": 721, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "115": { + "question_id": "115", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1079, + "img_width": 826, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "117": { + "question_id": "117", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the range of this function?\nChoices:\n(A) [0, 2]\n(B) [3, 2]\n(C) [2, 4]\n(D) [-3, 4]", + "choices": [ + "[0, 2]", + "[3, 2]", + "[2, 4]", + "[-3, 4]" + ], + "answer": "[0, 2]", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "[0, 2]", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 356, + "img_width": 460, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "119": { + "question_id": "119", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, P is a point outside \u2299O, PA and PB intersect \u2299O at two points C and D respectively. It is known that the central angles of \u2040AB and \u2040CD are 90.0 and 50.0 respectively, then \u2220P = ()\nChoices:\n(A) 45\u00b0\n(B) 40\u00b0\n(C) 25\u00b0\n(D) 20\u00b0", + "choices": [ + "45\u00b0", + "40\u00b0", + "25\u00b0", + "20\u00b0" + ], + "answer": "20\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 165, + "img_width": 103, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "121": { + "question_id": "121", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In trying to calculate how much money could be saved by packing lunch, Manny recorded the amount he spent on lunch each day. According to the table, what was the rate of change between Wednesday and Thursday? (Unit: $, per day)", + "choices": null, + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 235, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "123": { + "question_id": "123", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagram represents successive rotations, starting from the top down. Which shape comes next?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "D", + "extraction": "D", + "prediction": "D", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 579, + "img_width": 412, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "125": { + "question_id": "125", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens if caterpillars decrease?\nChoices:\n(A) plants decrease\n(B) plants increase\n(C) nothing happens\n(D) none of the above", + "choices": [ + "plants decrease", + "plants increase", + "nothing happens", + "none of the above" + ], + "answer": "plants increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "plants decrease", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 947, + "img_width": 850, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "127": { + "question_id": "127", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much more accurate is the most accurate algorithm compared the least accurate algorithm?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "129": { + "question_id": "129", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the twig to the nearest inch. The twig is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 156, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "131": { + "question_id": "131", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have value below 40?", + "choices": null, + "answer": "3", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "133": { + "question_id": "133", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the merchandise exports greater than 0.92 %?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1268, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "135": { + "question_id": "135", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of buss that are in front of the big yellow aeroplane less than the number of matte bicycles that are on the right side of the tiny thing?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "137": { + "question_id": "137", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function (f: R to R) injective?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 291, + "img_width": 258, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "139": { + "question_id": "139", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Indigo have the lowest value?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 543, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "141": { + "question_id": "141", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is a long ladder leaning on the wall, the foot of the ladder B is away from the wall 1.6, the point D on the ladder is away from the wall 1.4, the length of BD is 0.55, then the length of the ladder is ()\nChoices:\n(A) 3.85\u7c73\n(B) 4.00\u7c73\n(C) 4.40\u7c73\n(D) 4.50\u7c73", + "choices": [ + "3.85\u7c73", + "4.00\u7c73", + "4.40\u7c73", + "4.50\u7c73" + ], + "answer": "4.40\u7c73", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3.85\u7c73", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 128, + "img_width": 78, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "143": { + "question_id": "143", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the parallelogram ABCD, CE bisects \u2220BCD and it intersects the AD edge at point E, and DE = 3.0, then the length of AB is ()\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 6", + "choices": [ + "1", + "2", + "3", + "6" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 85, + "img_width": 204, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "145": { + "question_id": "145", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Can you find the missing term?", + "choices": null, + "answer": "10", + "extraction": "15", + "prediction": "15", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 506, + "img_width": 900, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "147": { + "question_id": "147", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagrams below show two pure samples of gas in identical closed, rigid containers. Each colored ball represents one gas particle. Both samples have the same number of particles. Compare the average kinetic energies of the particles in each sample. Which sample has the higher temperature?\nChoices:\n(A) neither; the samples have the same temperature\n(B) sample B\n(C) sample A", + "choices": [ + "neither; the samples have the same temperature", + "sample B", + "sample A" + ], + "answer": "sample B", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; the samples have the same temperature", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 405, + "img_width": 563, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "149": { + "question_id": "149", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfl1\u2225l2\uff0c\u22201\uff1d50\u00b0\uff0c\u22202\uff1d75\u00b0\uff0c\u5219\u22203\uff1d\uff08\uff09\nChoices:\n(A) 55\u00b0\n(B) 60\u00b0\n(C) 65\u00b0\n(D) 70\u00b0", + "choices": [ + "55\u00b0", + "60\u00b0", + "65\u00b0", + "70\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "55\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 93, + "img_width": 156, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "151": { + "question_id": "151", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When does the function reach its local maximum?\nChoices:\n(A) (u1, u2) = (0, 0)\n(B) (u1, u2) = (1, 0)\n(C) (u1, u2) = (0, 1)\n(D) (u1, u2) = (1, 1)", + "choices": [ + "(u1, u2) = (0, 0)", + "(u1, u2) = (1, 0)", + "(u1, u2) = (0, 1)", + "(u1, u2) = (1, 1)" + ], + "answer": "(u1, u2) = (0, 0)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(u1, u2) = (0, 0)", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 325, + "img_width": 458, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "153": { + "question_id": "153", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would be impacted by an increase in owls?\nChoices:\n(A) sun\n(B) grasshoppers\n(C) grass\n(D) mice", + "choices": [ + "sun", + "grasshoppers", + "grass", + "mice" + ], + "answer": "mice", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "sun", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 423, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "155": { + "question_id": "155", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Web Green have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 601, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "157": { + "question_id": "157", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9335", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 279, + "img_width": 637, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "159": { + "question_id": "159", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between two consecutive major ticks on the Y-axis ?", + "choices": null, + "answer": "100", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1000, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "161": { + "question_id": "161", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the two numbers visible in the picture?", + "choices": null, + "answer": "71", + "extraction": "16", + "prediction": "16", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "163": { + "question_id": "163", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "7519", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 285, + "img_width": 637, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "165": { + "question_id": "165", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all cyan rubber cylinders. Subtract all tiny shiny cubes. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "167": { + "question_id": "167", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the biggest zero of this function?", + "choices": null, + "answer": "2", + "extraction": "-3", + "prediction": "-3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1920, + "img_width": 1920, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "169": { + "question_id": "169", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between two consecutive major ticks on the Y-axis ?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1049, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "171": { + "question_id": "171", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many cinnamon rolls are there?", + "choices": null, + "answer": "20", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 190, + "img_width": 467, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "173": { + "question_id": "173", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of small rubber buss behind the big green road bike less than the number of suvs that are behind the large brown matte truck?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "175": { + "question_id": "175", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of accuracies of the algorithm liver for all the datasets?", + "choices": null, + "answer": "24", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "177": { + "question_id": "177", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of brown tandem bikes that are to the left of the small blue matte car greater than the number of tiny blue biplanes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "179": { + "question_id": "179", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0c\u5df2\u77e5AC\uff1d4cm\uff0c\u82e5\u25b3ACD\u7684\u5468\u957f\u4e3a14cm\uff0c\u5219ABCD\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 14cm\n(B) 28cm\n(C) 10cm\n(D) 20cm", + "choices": [ + "14cm", + "28cm", + "10cm", + "20cm" + ], + "answer": "20cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "14cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 94, + "img_width": 157, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "181": { + "question_id": "181", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option is correct?\nChoices:\n(A) A\n(B) B\n(C) C", + "choices": [ + "A", + "B", + "C" + ], + "answer": "C", + "extraction": "A", + "prediction": "A", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 332, + "img_width": 864, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "183": { + "question_id": "183", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown cubes. Subtract all gray cylinders. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "4", + "prediction": "4", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "185": { + "question_id": "185", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: An image has the gray level PDF $p_r(r)$ shown in Fig. Q1a. One wants to do histogram specification SO that the processed image will have the specified $p_z(z)$ shown in Fig. Q1b. Can we use intensity mapping function $T: z=1-r$ to achieve the goal?\nChoices:\n(A) True\n(B) False", + "choices": [ + "True", + "False" + ], + "answer": "False", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "True", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 376, + "img_width": 724, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "187": { + "question_id": "187", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9015", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 279, + "img_width": 634, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "189": { + "question_id": "189", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest accuracy reported in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "191": { + "question_id": "191", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the volume of the air carriers in Ethiopia greater than the average volume of the air carriers in Ethiopia taken over all years ?", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1116, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "193": { + "question_id": "193", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red things. Subtract all cylinders. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "195": { + "question_id": "195", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u662f\u2299O\u7684\u76f4\u5f84\uff0cC\uff0cD\u4e24\u70b9\u5728\u2299O\u4e0a\uff0c\u2220BCD\uff1d25\u00b0\uff0c\u5219\u2220AOD\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 120\u00b0\n(B) 125\u00b0\n(C) 130\u00b0\n(D) 135\u00b0", + "choices": [ + "120\u00b0", + "125\u00b0", + "130\u00b0", + "135\u00b0" + ], + "answer": "130\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "120\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 95, + "img_width": 110, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "197": { + "question_id": "197", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many sequences have negative Influence Scores?", + "choices": null, + "answer": "2", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "bar chart", + "grade": "college", + "img_height": 772, + "img_width": 1766, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "199": { + "question_id": "199", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Figure 23-42 is a section of a conducting rod of radius $R_1=1.30 \\mathrm{~mm}$ and length $L=$ $11.00 \\mathrm{~m}$ inside a thin-walled coaxial conducting cylindrical shell of radius $R_2=10.0 R_1$ and the (same) length $L$. The net charge on the rod is $Q_1=+3.40 \\times 10^{-12} \\mathrm{C}$; that on the shell is $Q_2=-2.00 Q_1$. What is the magnitude $E$ of the electric field at radial distance $r=2.00 R_2$?", + "choices": null, + "answer": "0.21", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 303, + "img_width": 262, + "language": "english", + "skills": [ + "algebraic reasoning", + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "201": { + "question_id": "201", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of all the values in the border group?", + "choices": null, + "answer": "19", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "203": { + "question_id": "203", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u57285\u00d74\u7684\u6b63\u65b9\u5f62\u7f51\u683c\u4e2d\uff0c\u6bcf\u4e2a\u5c0f\u6b63\u65b9\u5f62\u7684\u8fb9\u957f\u90fd\u662f1\uff0c\u25b3ABC\u7684\u9876\u70b9\u90fd\u5728\u8fd9\u4e9b\u5c0f\u6b63\u65b9\u5f62\u7684\u9876\u70b9\u4e0a\uff0c\u5219tan\u2220BAC\u7684\u503c\u4e3a\uff08\uff09\nChoices:\n(A) \\frac{4}{3}\n(B) 0.75\n(C) 0.6\n(D) 0.8", + "choices": [ + "\\frac{4}{3}", + "0.75", + "0.6", + "0.8" + ], + "answer": "\\frac{4}{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{4}{3}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 151, + "img_width": 172, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "205": { + "question_id": "205", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A statistician analyzed the number of runs scored by players last season. How many players scored more than 2 runs last season?'", + "choices": null, + "answer": "24", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 190, + "img_width": 351, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "207": { + "question_id": "207", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms magic and secure?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "209": { + "question_id": "209", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the highest value in black line chart ?", + "choices": null, + "answer": "28.3", + "extraction": "2.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "211": { + "question_id": "211", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracies higher than 2?", + "choices": null, + "answer": "6", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "213": { + "question_id": "213", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In which year there was lowest per capita real gross domestic product of ohio?", + "choices": null, + "answer": "2001", + "extraction": "2012", + "prediction": "2012", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "215": { + "question_id": "215", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Layla went on a camping trip and logged the number of miles she hiked each day. What is the range of the numbers?'", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 249, + "img_width": 212, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "217": { + "question_id": "217", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the degree of this function?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 202, + "img_width": 304, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "219": { + "question_id": "219", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "221": { + "question_id": "221", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, A, B, C are three points on \u2299O, \u2220ACB = 25.0, then the degree of \u2220BAO is ()\nChoices:\n(A) 50\u00b0\n(B) 55\u00b0\n(C) 60\u00b0\n(D) 65\u00b0", + "choices": [ + "50\u00b0", + "55\u00b0", + "60\u00b0", + "65\u00b0" + ], + "answer": "65\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 108, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "223": { + "question_id": "223", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this an even function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 776, + "img_width": 1430, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "225": { + "question_id": "225", + "query": "Hint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Fig. Q4 shows the contour of an object. Represent it with an 8-directional chain code. The resultant chain code should be normalized with respect to the starting point of the chain code. Represent the answer as a list with each digit as a element.", + "choices": null, + "answer": "[0, 2, 0, 2, 1, 7, 1, 2, 0, 3, 0, 6]", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "true_false": false, + "question_type": "free_form", + "answer_type": "list", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 560, + "img_width": 846, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "227": { + "question_id": "227", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 580, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "229": { + "question_id": "229", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the highest lysine level given?\nChoices:\n(A) 0.33%\n(B) 0.31%\n(C) 0.29%\n(D) 0.32%\n(E) 0.30%", + "choices": [ + "0.33%", + "0.31%", + "0.29%", + "0.32%", + "0.30%" + ], + "answer": "0.30%", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.33%", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2185, + "img_width": 1683, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "231": { + "question_id": "231", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model has the overall best ImageNet 10shot Accuracy score across different training steps?\nChoices:\n(A) Identity\n(B) Uniform\n(C) Uniform / Soft\n(D) Soft / Uniform\n(E) Soft\n(F) Dense", + "choices": [ + "Identity", + "Uniform", + "Uniform / Soft", + "Soft / Uniform", + "Soft", + "Dense" + ], + "answer": "Soft", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Identity", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 988, + "img_width": 2002, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "233": { + "question_id": "233", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 199, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "235": { + "question_id": "235", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the epigraph of a function f an infinite set?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 266, + "img_width": 412, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "237": { + "question_id": "237", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the Red squirrel and deer mouse population were to decrease, what would happen to the deer tick population?\nChoices:\n(A) increase\n(B) fluctuate\n(C) it would decrease\n(D) stay the same", + "choices": [ + "increase", + "fluctuate", + "it would decrease", + "stay the same " + ], + "answer": "it would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 346, + "img_width": 400, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "239": { + "question_id": "239", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Sky Blue the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 769, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "241": { + "question_id": "241", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many models in the table have a model size larger than 10B?", + "choices": null, + "answer": "11", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1184, + "img_width": 1570, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "243": { + "question_id": "243", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: \u0686\u0646\u062f \u0639\u062f\u062f \u0634\u06cc\u0631\u06cc\u0646\u06cc \u0645\u062b\u0644\u062b\u06cc \u0634\u06a9\u0644 \u062f\u0631 \u062c\u0639\u0628\u0647 \u0627\u0633\u062a\u061f", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1001, + "img_width": 564, + "language": "persian", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "ParsVQA-Caps", + "split": "testmini", + "task": "visual question answering" + }, + "245": { + "question_id": "245", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Damon need to buy a grilled steak and a mushroom pizza? (Unit: $)", + "choices": null, + "answer": "24", + "extraction": "22", + "prediction": "22", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 128, + "img_width": 259, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "247": { + "question_id": "247", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: A spaceship of mass $m=4.50 \\times 10^3 \\mathrm{~kg}$ is in a circular Earth orbit of radius $r=8.00 \\times 10^6 \\mathrm{~m}$ and period $T_0=118.6 \\mathrm{~min}=$ $7.119 \\times 10^3 \\mathrm{~s}$ when a thruster is fired in the forward direction to decrease the speed to $96.0 \\%$ of the original speed. What is the period $T$ of the resulting elliptical orbit (Figure)?", + "choices": null, + "answer": "6.36", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 906, + "img_width": 914, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "249": { + "question_id": "249", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all green rubber cubes. Subtract all red matte blocks. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "251": { + "question_id": "251", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all green balls. Subtract all shiny things. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "253": { + "question_id": "253", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many objects are preferred by more than 7 people in at least one category?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "255": { + "question_id": "255", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u2220BAC = 110.0, if A and B are symmetrical with respect to the line MP, A and C are symmetrical with respect to the line NQ, then the size of \u2220PAQ is ()\nChoices:\n(A) 70\u00b0\n(B) 55\u00b0\n(C) 40\u00b0\n(D) 30\u00b0", + "choices": [ + "70\u00b0", + "55\u00b0", + "40\u00b0", + "30\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "70\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 188, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "257": { + "question_id": "257", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u4ee5\u76f4\u89d2\u4e09\u89d2\u5f62\u7684\u4e09\u8fb9\u4e3a\u8fb9\u5411\u5916\u4f5c\u6b63\u65b9\u5f62\uff0c\u5176\u4e2d\u4e24\u4e2a\u6b63\u65b9\u5f62\u7684\u9762\u79ef\u5982\u56fe\u6240\u793a\uff0c\u5219\u6b63\u65b9\u5f62A\u7684\u9762\u79ef\u4e3a\uff08\uff09\nChoices:\n(A) 6\n(B) 36\n(C) 64\n(D) 8", + "choices": [ + "6", + "36", + "64", + "8" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 119, + "img_width": 109, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "259": { + "question_id": "259", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large yellow metal blocks. Subtract all gray metallic cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "261": { + "question_id": "261", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 345, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "263": { + "question_id": "263", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "38", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 117, + "img_width": 113, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "265": { + "question_id": "265", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Justine's P.E. class participated in a push-up competition, and Justine wrote down how many push-ups each person could do. How many people did at least 60 push-ups? (Unit: people)", + "choices": null, + "answer": "11", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 136, + "img_width": 329, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "267": { + "question_id": "267", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What shape of a leaf is similar to Serrate, but has smaller, evenly-spaced teeth?\nChoices:\n(A) Undulate\n(B) Sinuate\n(C) Serrulate\n(D) Entire", + "choices": [ + "Undulate", + "Sinuate", + "Serrulate", + "Entire" + ], + "answer": "Serrulate", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Undulate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 306, + "img_width": 529, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "269": { + "question_id": "269", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the elevation angle of the top of a building is 30.0 when viewed from point A in the air by a hot air balloon, and the depression angle of this building is 60.0. The horizontal distance between the hot air balloon and the building is 120.0. The height of this building is ()\nChoices:\n(A) 160m\n(B) 160\u221a{3}m\n(C) (160-160\u221a{3})m\n(D) 360m", + "choices": [ + "160m", + "160\u221a{3}m", + "(160-160\u221a{3})m", + "360m" + ], + "answer": "160\u221a{3}m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "160m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 159, + "img_width": 133, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "271": { + "question_id": "271", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find y\nChoices:\n(A) 3\n(B) 4.5\n(C) 5\n(D) 6", + "choices": [ + "3", + "4.5", + "5", + "6" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 287, + "img_width": 448, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "273": { + "question_id": "273", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: One diagonal of a rhombus is twice as long as the other diagonal. If the area of the rhombus is 169 square millimeters, what are the lengths of the diagonals?\nChoices:\n(A) 6.5\n(B) 13\n(C) 26\n(D) 52", + "choices": [ + "6.5", + "13", + "26", + "52" + ], + "answer": "26", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 237, + "img_width": 347, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "275": { + "question_id": "275", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220BAC = 90.0, AD \u22a5 BC at D, DE \u22a5 AB at E, AD = 3.0, DE = 2.0, then the length of CD is ()\nChoices:\n(A) \\frac{21}{2}\n(B) \\frac{\u221a{15}}{2}\n(C) \\frac{9}{2}\n(D) \\frac{3\u221a{5}}{2}", + "choices": [ + "\\frac{21}{2}", + "\\frac{\u221a{15}}{2}", + "\\frac{9}{2}", + "\\frac{3\u221a{5}}{2}" + ], + "answer": "\\frac{3\u221a{5}}{2}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{21}{2}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 107, + "img_width": 185, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "277": { + "question_id": "277", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which cube is identical to the unfolded net?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "D", + "extraction": "E", + "prediction": "E", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 591, + "img_width": 424, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "279": { + "question_id": "279", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would be directly affected by a decrease in sunlight?\nChoices:\n(A) grass\n(B) mouse\n(C) grasshopper\n(D) owl", + "choices": [ + "grass", + "mouse", + "grasshopper", + "owl" + ], + "answer": "grass", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "grass", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 423, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "281": { + "question_id": "281", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Was this a square pizza?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "283": { + "question_id": "283", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{WTY} \\cong \\overline{TWY}$. Find $x$.\nChoices:\n(A) 2\n(B) 4\n(C) 5\n(D) 10", + "choices": [ + "2", + "4", + "5", + "10" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 416, + "img_width": 559, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "285": { + "question_id": "285", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, it is known that AB is the diameter of \u2299O, if the degree of \u2220BOC is 50.0, then the degree of \u2220A is ()\nChoices:\n(A) 50\u00b0\n(B) 40\u00b0\n(C) 30\u00b0\n(D) 25\u00b0", + "choices": [ + "50\u00b0", + "40\u00b0", + "30\u00b0", + "25\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 100, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "287": { + "question_id": "287", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region is larger? R1 or R2?\nA. R1\nB. R2\nChoices:\n(A) R1\n(B) R2\n(C) R5\n(D) R3\n(E) R4", + "choices": [ + "R1", + "R2", + "R5", + "R3", + "R4" + ], + "answer": "R2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "R1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 325, + "img_width": 370, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "289": { + "question_id": "289", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 4 dots divided into 2 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 418, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "291": { + "question_id": "291", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which period the number of full time employees is the maximum?\nChoices:\n(A) Jul '21\n(B) Jun '21\n(C) Mar '21\n(D) May '21\n(E) Apr '21", + "choices": [ + "Jul '21", + "Jun '21", + "Mar '21", + "May '21", + "Apr '21" + ], + "answer": "May '21", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Jul '21", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "293": { + "question_id": "293", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, grasshopper population increase if\nChoices:\n(A) grouse decrease\n(B) chipmunk increases\n(C) grasses increases\n(D) elk increase", + "choices": [ + "grouse decrease", + "chipmunk increases", + "grasses increases", + "elk increase" + ], + "answer": "grasses increases", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "grouse decrease", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 156, + "img_width": 456, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "295": { + "question_id": "295", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "297": { + "question_id": "297", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of green buss greater than the number of blue school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "299": { + "question_id": "299", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the center and the rightmost person? (Unit: years)", + "choices": null, + "answer": "22", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1067, + "img_width": 1600, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "301": { + "question_id": "301", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model performs the best overall across the three stages in terms of Messenger training performance?\nChoices:\n(A) Dynalang\n(B) EMMA\n(C) R2D2\n(D) IMPALA", + "choices": [ + "Dynalang", + "EMMA", + "R2D2", + "IMPALA" + ], + "answer": "Dynalang", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Dynalang", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 524, + "img_width": 2012, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "303": { + "question_id": "303", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Lime Green less than Dim Gray?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 797, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "305": { + "question_id": "305", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people prefer the most preferred object?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "307": { + "question_id": "307", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Figure is an overhead view of the path taken by a race car driver as his car collides with the racetrack wall. Just before the collision, he is traveling at speed $v_i=70 \\mathrm{~m} / \\mathrm{s}$ along a straight line at $30^{\\circ}$ from the wall. Just after the collision, he is traveling at speed $v_f=50 \\mathrm{~m} / \\mathrm{s}$ along a straight line at $10^{\\circ}$ from the wall. His mass $m$ is $80 \\mathrm{~kg}$. The collision lasts for $14 \\mathrm{~ms}$. What is the magnitude of the average force on the driver during the collision?", + "choices": null, + "answer": "2.58", + "extraction": "120.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 466, + "img_width": 772, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning", + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "309": { + "question_id": "309", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The movie critic liked to count the number of actors in each movie he saw. How many movies had at least 30 actors but fewer than 47 actors? (Unit: movies)", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 136, + "img_width": 131, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "311": { + "question_id": "311", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "2", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1947, + "img_width": 1620, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "313": { + "question_id": "313", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "10", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 334, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "315": { + "question_id": "315", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram above, angle A is congruent to angle BED, and angle C is congruent to angle D. If the ratio of the length of AB to the length of EB is 5:1, and the area of the triangle BED is 5*a^2 + 10, what is the area of triangle ABC?\nChoices:\n(A) 5*a^2 + 10\n(B) 25*a^2 + 50\n(C) 25*a^2 + 100\n(D) 125*a^2 + 250\n(E) cannot be determined", + "choices": [ + "5*a^2 + 10", + "25*a^2 + 50", + "25*a^2 + 100", + "125*a^2 + 250", + "cannot be determined" + ], + "answer": "125*a^2 + 250", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5*a^2 + 10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 463, + "img_width": 749, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "317": { + "question_id": "317", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 361, + "img_width": 496, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "319": { + "question_id": "319", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Would most of the ground cover be considered weeds?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "321": { + "question_id": "321", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At a price of $330, is there a shortage or a surplus?'\nChoices:\n(A) shortage\n(B) surplus", + "choices": [ + "shortage", + "surplus" + ], + "answer": "surplus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "shortage", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 353, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "323": { + "question_id": "323", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Craig just downloaded the new game Gem Excavator on his phone. In the first level, Craig gains points for each green gem he finds. However, he loses points for each red gem he finds. The table shows how the gems affect Craig's points. Which color gem affects Craig's points less?'\nChoices:\n(A) green\n(B) red", + "choices": [ + "green", + "red" + ], + "answer": "green", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "green", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 94, + "img_width": 230, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "325": { + "question_id": "325", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Web Purple have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "327": { + "question_id": "327", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many items sold less than 1 units in at least one store?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "329": { + "question_id": "329", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The derivative of y at x=6 is ____ that at x=8\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "larger than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 2039, + "img_width": 2560, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "331": { + "question_id": "331", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Several people compared how many Web pages they had visited. What is the mean of the numbers?'", + "choices": null, + "answer": "64", + "extraction": "65", + "prediction": "65", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 311, + "img_width": 246, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "333": { + "question_id": "333", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find tan X\nChoices:\n(A) \\frac { 5 } { 12 }\n(B) \\frac { 12 } { 13 }\n(C) \\frac { 17 } { 12 }\n(D) \\frac { 12 } { 5 }", + "choices": [ + "\\frac { 5 } { 12 }", + "\\frac { 12 } { 13 }", + "\\frac { 17 } { 12 }", + "\\frac { 12 } { 5 }" + ], + "answer": "\\frac { 5 } { 12 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac { 5 } { 12 }", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 149, + "img_width": 297, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "335": { + "question_id": "335", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large brown matte balls. Subtract all blue cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "337": { + "question_id": "337", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) to eight.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "339": { + "question_id": "339", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u2299O\u4e2d\uff0cAB=AC\uff0c\u2220BAC\uff1d70\u00b0\uff0c\u5219\u2220AEC\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 65\u00b0\n(B) 75\u00b0\n(C) 50\u00b0\n(D) 55\u00b0", + "choices": [ + "65\u00b0", + "75\u00b0", + "50\u00b0", + "55\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 112, + "img_width": 115, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "341": { + "question_id": "341", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is six (_).\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "o'clock", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "343": { + "question_id": "343", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small purple metallic spheres. Subtract all small purple things. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "7", + "prediction": "7", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "345": { + "question_id": "345", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many kites are there?", + "choices": null, + "answer": "25", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 429, + "img_width": 711, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "347": { + "question_id": "347", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of green metallic double buss less than the number of big purple rubber cruisers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "349": { + "question_id": "349", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which capability boasts the highest proportion (%)?\nChoices:\n(A) Rec\n(B) OCR\n(C) Know\n(D) Gen\n(E) Spat\n(F) Math", + "choices": [ + "Rec", + "OCR", + "Know", + "Gen", + "Spat", + "Math" + ], + "answer": "Rec", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Rec", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "bar chart", + "grade": "college", + "img_height": 1348, + "img_width": 1704, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "351": { + "question_id": "351", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer purple rubber objects that are to the left of the red object than tiny matte bicycles?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "353": { + "question_id": "353", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: At time $t=0$ a tank contains $Q_0 \\mathrm{lb}$ of salt dissolved in 100 gal of water; see Figure 2.3.1. Assume that water containing $\\frac{1}{4} \\mathrm{lb}$ of salt/gal is entering the tank at a rate of $r \\mathrm{gal} / \\mathrm{min}$ and that the well-stirred mixture is draining from the tank at the same rate. Set up the initial value problem that describes this flow process. By finding the amount of salt $Q(t)$ in the tank at any time, and the limiting amount $Q_L$ that is present after a very long time, if $r=3$ and $Q_0=2 Q_L$, find the time $T$ after which the salt level is within $2 \\%$ of $Q_L$.", + "choices": null, + "answer": "130.4", + "extraction": "1.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 938, + "img_width": 996, + "language": "english", + "skills": [ + "algebraic reasoning", + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "355": { + "question_id": "355", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the parallel lines a and b are intercepted by the straight line c. If \u22201 = 50.0, then the degree of \u22202 is ()\nChoices:\n(A) 150\u00b0\n(B) 130\u00b0\n(C) 110\u00b0\n(D) 100\u00b0", + "choices": [ + "150\u00b0", + "130\u00b0", + "110\u00b0", + "100\u00b0" + ], + "answer": "130\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "150\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 157, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "357": { + "question_id": "357", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Salmon the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 677, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "359": { + "question_id": "359", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Kylie spent a week at the beach and recorded the number of shells she found each day. According to the table, what was the rate of change between Thursday and Friday? (Unit: shells per day)", + "choices": null, + "answer": "-7", + "extraction": "-3", + "prediction": "-3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 241, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "361": { + "question_id": "361", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the mold are the cylindrical ports located? \nChoices:\n(A) Upper half\n(B) Lower half\n(C) Medial half\n(D) Lateral half", + "choices": [ + "Upper half", + "Lower half", + "Medial half", + "Lateral half" + ], + "answer": "Lower half", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Upper half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "medical image", + "grade": "college", + "img_height": 435, + "img_width": 596, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "PMC-VQA", + "split": "testmini", + "task": "visual question answering" + }, + "363": { + "question_id": "363", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny gray metal blocks. Subtract all purple things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "365": { + "question_id": "365", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big yellow metallic spheres. Subtract all tiny metal things. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "367": { + "question_id": "367", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "14", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 429, + "img_width": 873, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "369": { + "question_id": "369", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function (f: R to R) surjective?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 331, + "img_width": 266, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "371": { + "question_id": "371", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220ABC\uff1d90\u00b0\uff0c\u70b9D\u3001E\u3001F\u5206\u522b\u662f\u8fb9AB\u3001BC\u3001CA\u7684\u4e2d\u70b9\uff0c\u82e5DE+BF\uff1d8\uff0c\u5219BF\u7684\u503c\u4e3a\uff08\uff09\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 146, + "img_width": 109, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "373": { + "question_id": "373", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the quadrilateral ABCD, \u2220BAD = 120.0, \u2220B = \u2220D = 90.0, if you find a point M on BC and CD respectively, so that the perimeter of \u25b3AMN is the smallest, then the degree of \u2220AMN + \u2220ANM is ()\nChoices:\n(A) 110\u00b0\n(B) 120\u00b0\n(C) 140\u00b0\n(D) 150\u00b0", + "choices": [ + "110\u00b0", + "120\u00b0", + "140\u00b0", + "150\u00b0" + ], + "answer": "120\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "110\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 161, + "img_width": 122, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "375": { + "question_id": "375", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the length of $AC$ in the isosceles triangle ABC. \nChoices:\n(A) 1.5\n(B) 7\n(C) 11\n(D) 12.5", + "choices": [ + "1.5", + "7", + "11", + "12.5" + ], + "answer": "7", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 293, + "img_width": 703, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "377": { + "question_id": "377", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 649, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "379": { + "question_id": "379", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram of the food web shown what will most directly be affected by the loss of the trees?\nChoices:\n(A) horses\n(B) cats\n(C) nothing\n(D) bears", + "choices": [ + "horses", + "cats", + "nothing", + "bears" + ], + "answer": "horses", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "horses", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 400, + "img_width": 570, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "381": { + "question_id": "381", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there more tiny cyan matte articulated buss left of the big school bus than small yellow matte double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "383": { + "question_id": "383", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What value you get , if you divide the largest bar value by 2 ?", + "choices": null, + "answer": "131253.5", + "extraction": "12.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "385": { + "question_id": "385", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Cyan have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 771, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "387": { + "question_id": "387", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Of the four balls in the photo, what is the percentage of them on the ground?", + "choices": null, + "answer": "100", + "extraction": "75", + "prediction": "75", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 485, + "img_width": 363, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "389": { + "question_id": "389", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At a price of $320, is there a shortage or a surplus?'\nChoices:\n(A) shortage\n(B) surplus", + "choices": [ + "shortage", + "surplus" + ], + "answer": "shortage", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "shortage", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 353, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "391": { + "question_id": "391", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, point O is the center of \u2299O, points A, B, and C are on \u2299O, AO \u2225 BC, \u2220AOB = 40.0, then the degree of \u2220OAC is equal to ()\nChoices:\n(A) 40\u00b0\n(B) 60\u00b0\n(C) 50\u00b0\n(D) 20\u00b0", + "choices": [ + "40\u00b0", + "60\u00b0", + "50\u00b0", + "20\u00b0" + ], + "answer": "20\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 96, + "img_width": 96, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "393": { + "question_id": "393", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest and the lowest dark blue bar?", + "choices": null, + "answer": "54", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "395": { + "question_id": "395", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average age of the people in this picture?", + "choices": null, + "answer": "10", + "extraction": "25", + "prediction": "25", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "397": { + "question_id": "397", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9A\u3001B\u3001C\u90fd\u5728\u534a\u5f84\u4e3a2\u7684\u2299O\u4e0a\uff0c\u2220C\uff1d30\u00b0\uff0c\u5219\u5f26AB\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 2.2\n(D) 2.5", + "choices": [ + "1", + "2", + "2.2", + "2.5" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 70, + "img_width": 73, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "399": { + "question_id": "399", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "6", + "extraction": "6", + "prediction": "6", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 241, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "401": { + "question_id": "401", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "403": { + "question_id": "403", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find TX if $E X=24$ and $D E=7$\nChoices:\n(A) 7\n(B) 24\n(C) 25\n(D) 32", + "choices": [ + "7", + "24", + "25", + "32" + ], + "answer": "32", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 221, + "img_width": 564, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "405": { + "question_id": "405", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "19", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1351, + "img_width": 1801, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "407": { + "question_id": "407", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9B\uff0cD\uff0cE\uff0cC\u5728\u540c\u4e00\u6761\u76f4\u7ebf\u4e0a\uff0c\u82e5\u25b3ABD\u224c\u25b3ACE\uff0c\u2220AEC\uff1d110\u00b0\uff0c\u5219\u2220DAE\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 30\u00b0\n(B) 40\u00b0\n(C) 50\u00b0\n(D) 60\u00b0", + "choices": [ + "30\u00b0", + "40\u00b0", + "50\u00b0", + "60\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 67, + "img_width": 76, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "409": { + "question_id": "409", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the radius of this circle?", + "choices": null, + "answer": "5", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 356, + "img_width": 358, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "411": { + "question_id": "411", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average percentage of population having access to electricity per year?", + "choices": null, + "answer": "100", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1081, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "413": { + "question_id": "413", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5df2\u77e5\uff1a\u5982\u56fe\uff0c\u25b3ABC\u4e2d\uff0cAB\uff1dAC\uff0cBD\u4e3a\u2220ABC\u7684\u5e73\u5206\u7ebf\uff0c\u2220BDC\uff1d75\u00b0\uff0c\u5219\u2220A\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 25\u00b0\n(B) 35\u00b0\n(C) 40\u00b0\n(D) 45\u00b0", + "choices": [ + "25\u00b0", + "35\u00b0", + "40\u00b0", + "45\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 132, + "img_width": 123, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "415": { + "question_id": "415", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average annual wage in Slovak Republic in the year 2019", + "choices": null, + "answer": "15017", + "extraction": "10000", + "prediction": "10000", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "417": { + "question_id": "417", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "8", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 748, + "img_width": 564, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "419": { + "question_id": "419", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) after nine.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "421": { + "question_id": "421", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An elevator cab of mass $m=500 \\mathrm{~kg}$ is descending with speed $v_i=4.0 \\mathrm{~m} / \\mathrm{s}$ when its supporting cable begins to slip, allowing it to fall with constant acceleration $\\vec{a}=\\vec{g} / 5$.\r\nDuring the $12 \\mathrm{~m}$ fall, what is the work $W_T$ done on the cab by the upward pull $\\vec{T}$ of the elevator cable?", + "choices": null, + "answer": "-47", + "extraction": "1200", + "prediction": "1200", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 1190, + "img_width": 550, + "language": "english", + "skills": [ + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "423": { + "question_id": "423", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Deep Pink less than Dark Gray?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 577, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "425": { + "question_id": "425", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5728Rt\u25b3ABC\u4e2d\uff0c\u2220C\uff1d90\u00b0\uff0c\u82e5AC\uff1d6\uff0cBC\uff1d8\uff0c\u5219cosA\u7684\u503c\u4e3a\uff08\uff09\nChoices:\n(A) 0.6\n(B) 0.8\n(C) 0.75\n(D) \\frac{4}{3}", + "choices": [ + "0.6", + "0.8", + "0.75", + "\\frac{4}{3}" + ], + "answer": "0.6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.6", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 120, + "img_width": 171, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "427": { + "question_id": "427", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people prefer the most preferred object?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "429": { + "question_id": "429", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people prefer the least preferred object?", + "choices": null, + "answer": "10", + "extraction": "10", + "prediction": "10", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "431": { + "question_id": "431", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, what would happen to dragonfly if all mayfly dies\nChoices:\n(A) remains the same\n(B) increase\n(C) decrease\n(D) NA", + "choices": [ + "remains the same", + "increase", + "decrease", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "remains the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 297, + "img_width": 464, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "433": { + "question_id": "433", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "5", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 350, + "img_width": 425, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "435": { + "question_id": "435", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of employed females who are not attending school greater than the average percentage of employed females who are not attending school taken over all years ?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 955, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "437": { + "question_id": "437", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fig.Q3 shows an excerpt of the transmission phase of a TCP connection. Assume the length of the IP header is 20 bytes. What is the ACK number at message 6?", + "choices": null, + "answer": "839", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 814, + "img_width": 638, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "439": { + "question_id": "439", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: is this function convex?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 256, + "img_width": 539, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "441": { + "question_id": "441", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "9", + "extraction": "9", + "prediction": "9", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 241, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "443": { + "question_id": "443", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure: In Rt\u25b3ABC, \u2220C = 90.0, AC = 8.0, AB = 10.0, then the value of sinB is equal to ()\nChoices:\n(A) \\frac{3}{5}\n(B) \\frac{4}{5}\n(C) \\frac{3}{4}\n(D) \\frac{4}{3}", + "choices": [ + "\\frac{3}{5}", + "\\frac{4}{5}", + "\\frac{3}{4}", + "\\frac{4}{3}" + ], + "answer": "\\frac{4}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{3}{5}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 80, + "img_width": 169, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "445": { + "question_id": "445", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Slate less than Saddle Brown?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 436, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "447": { + "question_id": "447", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Midnight Blue intersect Purple?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 685, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "449": { + "question_id": "449", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many miles per gallon do the average motorcycle get on the highway?", + "choices": null, + "answer": "40", + "extraction": "50", + "prediction": "50", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "451": { + "question_id": "451", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of small yellow metallic choppers that are behind the large cyan thing less than the number of brown metal double buss that are behind the small yellow shiny thing?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "453": { + "question_id": "453", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "4", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 116, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "455": { + "question_id": "455", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If x = 32 and r = 18, what is the length of the arc shown in the figure above?\nChoices:\n(A) 16*\\pi/5\n(B) 32*\\pi/5\n(C) 36*\\pi\n(D) 288*\\pi/5\n(E) 576*\\pi", + "choices": [ + "16*\\pi/5", + "32*\\pi/5", + "36*\\pi", + "288*\\pi/5", + "576*\\pi" + ], + "answer": "16*\\pi/5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "16*\\pi/5", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 353, + "img_width": 575, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "457": { + "question_id": "457", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "4525", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 97, + "img_width": 605, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "459": { + "question_id": "459", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large cyan matte balls. Subtract all tiny shiny objects. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "461": { + "question_id": "461", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A perceptual audio codec is used to compress an audio signal. The codec groups every 4 barks into a subband and then allocates bits to different subbands according to the result of a spectrum analysis based on a psychoacoustic model. All samples in the same subband are quantized with the same quantizer, and the bit resolution of which is allocated by the codec. (The Bark scale is a psychoacoustical scale proposed by Eberhard Zwicker in 1961.) Fig. Q1a shows the frequency spectrum of a windowed segment of audio signal. The psychoacoustic model shown in Fig. Q1b is used in the audio codec to derive the masking threshold for the audio segment. How many potential maskers in Fig. Q1a?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 488, + "img_width": 908, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "463": { + "question_id": "463", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large gray things. Subtract all small brown metallic balls. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "465": { + "question_id": "465", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Green the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 628, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "467": { + "question_id": "467", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The degree measures of minor arc $\\widehat{A C}$ and major arc $\\widehat{A D C}$ are $x$ and $y$ respectively. If $m\u2220ABC = 70\u00b0$, find $x$.\nChoices:\n(A) 90\n(B) 100\n(C) 110\n(D) 120", + "choices": [ + "90", + "100", + "110", + "120" + ], + "answer": "110", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "90", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 235, + "img_width": 499, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "469": { + "question_id": "469", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Sky Blue less than Chartreuse?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "471": { + "question_id": "471", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Lily and her friends recorded their scores while playing a board game. Which score did the greatest number of people receive?'", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 190, + "img_width": 351, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "473": { + "question_id": "473", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "12", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2604, + "img_width": 2500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "475": { + "question_id": "475", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 71, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "477": { + "question_id": "477", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past three.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "half", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "479": { + "question_id": "479", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How many times Norway data bigger than Italy data ?", + "choices": null, + "answer": "2.54", + "extraction": "1.75", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "481": { + "question_id": "481", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 404, + "img_width": 592, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "483": { + "question_id": "483", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, point C is on \u2299O, AE is the tangent of \u2299O, A is the tangent point, connect BC and extend to intersect AE at point D. If \u2220AOC = 80.0, then the degree of \u2220ADB is ()\nChoices:\n(A) 40\u00b0\n(B) 50\u00b0\n(C) 60\u00b0\n(D) 20\u00b0", + "choices": [ + "40\u00b0", + "50\u00b0", + "60\u00b0", + "20\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 129, + "img_width": 165, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "485": { + "question_id": "485", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9D\u5728\u7b49\u8fb9\u25b3ABC\u7684\u8fb9CB\u7684\u5ef6\u957f\u7ebf\u4e0a\uff0c\u70b9E\u5728\u7ebf\u6bb5BC\u4e0a\uff0c\u8fde\u63a5AD\uff0cAE\uff0c\u82e5DA\uff1dDE\uff0c\u4e14\u2220DAB\uff1d20\u00b0\uff0c\u90a3\u4e48\u2220EAC\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 20\u00b0\n(B) 15\u00b0\n(C) 10\u00b0\n(D) 5\u00b0", + "choices": [ + "20\u00b0", + "15\u00b0", + "10\u00b0", + "5\u00b0" + ], + "answer": "10\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 235, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "487": { + "question_id": "487", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer big cars behind the small brown shiny mountain bike than tiny objects on the right side of the bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "489": { + "question_id": "489", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For trapezoid ABCD shown above, AB = 24, AD = 23, and BC = 16. What is the length of segment CD?", + "choices": null, + "answer": "25", + "extraction": "18", + "prediction": "18", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 297, + "img_width": 426, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "491": { + "question_id": "491", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 540, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "493": { + "question_id": "493", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function differentiable at every point?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 847, + "img_width": 800, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "495": { + "question_id": "495", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer green things in front of the blue metallic car than choppers right of the chopper?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "497": { + "question_id": "497", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "499": { + "question_id": "499", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Quadrilateral $ABDC$ is a rectangle. If $m\\angle1 = 38$, find $m \\angle 2$\nChoices:\n(A) 33\n(B) 38\n(C) 52\n(D) 87", + "choices": [ + "33", + "38", + "52", + "87" + ], + "answer": "52", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "33", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 323, + "img_width": 559, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "501": { + "question_id": "501", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big red rubber cylinders. Subtract all blue objects. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "503": { + "question_id": "503", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the leftmost and the center person? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 225, + "img_width": 338, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "505": { + "question_id": "505", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the circle O with a radius of 5.0, the length of the chord AB is 8.0, then the distance from the center O to the chord AB is ()\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 100, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "507": { + "question_id": "507", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen if the hawk population increased?\nChoices:\n(A) mice would increase\n(B) sparrows increased\n(C) garter snakes would decrease\n(D) grass decreased", + "choices": [ + "mice would increase", + "sparrows increased", + "garter snakes would decrease", + "grass decreased" + ], + "answer": "garter snakes would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "mice would increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "509": { + "question_id": "509", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cadet Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 400, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "511": { + "question_id": "511", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people like the most preferred object in the whole chart?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "513": { + "question_id": "513", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the highest value in states that border West Virginia ?\nChoices:\n(A) 43.2%-63.6%\n(B) 45.2%-65.6%\n(C) 42.2%-62.6%\n(D) 41.2%-61.6%\n(E) 44.2%-64.6%", + "choices": [ + "43.2%-63.6%", + "45.2%-65.6%", + "42.2%-62.6%", + "41.2%-61.6%", + "44.2%-64.6%" + ], + "answer": "42.2%-62.6%", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "43.2%-63.6%", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "515": { + "question_id": "515", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: You would potentially see a decrease in which organism if gulls disappeared?\nChoices:\n(A) herring\n(B) kril\n(C) anchovy\n(D) phytoplankton", + "choices": [ + "herring", + "kril", + "anchovy", + "phytoplankton" + ], + "answer": "kril", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "herring", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 549, + "img_width": 398, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "517": { + "question_id": "517", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: At Bloomington Consulting, the head of human resources examined how the number of employees with health care benefits varied in response to policy changes. According to the table, what was the rate of change between 2014 and 2015? (Unit: employees per year)", + "choices": null, + "answer": "-1", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 275, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "519": { + "question_id": "519", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many Triangles do you see in the picture?", + "choices": null, + "answer": "12", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 852, + "img_width": 948, + "language": "english", + "skills": [ + "logical reasoning", + "geometry reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "521": { + "question_id": "521", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, point C is a point on \u2299O, \u2220C = 20.0, then the degree of \u2220BOC is ()\nChoices:\n(A) 20\u00b0\n(B) 30\u00b0\n(C) 40\u00b0\n(D) 60\u00b0", + "choices": [ + "20\u00b0", + "30\u00b0", + "40\u00b0", + "60\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 100, + "img_width": 120, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "523": { + "question_id": "523", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, a teaching interest group wants to measure the height of a tree CD. They firstly measured the elevation angle of the tree top C at point A as 30.0, and then proceeded 10.0 along the direction of AD to point B, and the elevation angle of tree top C measured at B is 60.0 (the three points A, B, and D are on the same straight line), then the height of the tree CD is ()\nChoices:\n(A) 10m\n(B) 5m\n(C) 5\u221a{3}m\n(D) 10\u221a{3}m", + "choices": [ + "10m", + "5m", + "5\u221a{3}m", + "10\u221a{3}m" + ], + "answer": "5\u221a{3}m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 179, + "img_width": 285, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "525": { + "question_id": "525", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest value shown on the X axis of first plot?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2209, + "img_width": 1711, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "527": { + "question_id": "527", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big shiny cars in front of the red airliner greater than the number of big purple road bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "529": { + "question_id": "529", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what number does the smaller arrow point to?", + "choices": null, + "answer": "1020", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 768, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "531": { + "question_id": "531", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) to five.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "533": { + "question_id": "533", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small cyan cubes. Subtract all large yellow rubber cubes. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "535": { + "question_id": "535", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "-8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "537": { + "question_id": "537", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of red rubber bicycles less than the number of cyan metal school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "539": { + "question_id": "539", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u70b9D\u3001E\u5206\u522b\u662f\u8fb9AB\u3001BC\u7684\u4e2d\u70b9\uff0c\u82e5\u25b3BDE\u7684\u5468\u957f\u662f6\uff0c\u5219\u25b3ABC\u7684\u5468\u957f\u662f\uff08\uff09\nChoices:\n(A) 8\n(B) 10\n(C) 12\n(D) 14", + "choices": [ + "8", + "10", + "12", + "14" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "8", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 71, + "img_width": 149, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "541": { + "question_id": "541", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the cubes is not identical to the unfolded net?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "D", + "extraction": "E", + "prediction": "E", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 560, + "img_width": 280, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "543": { + "question_id": "543", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer small purple matte cars than brown matte things?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "545": { + "question_id": "545", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Violet Red less than Crimson?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 764, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "547": { + "question_id": "547", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram below, which organisms will be most directly affected by a decrease in the amount of grass?\nChoices:\n(A) Insects\n(B) Hawk and snake\n(C) Snake and raccoon\n(D) Mouse and cricket", + "choices": [ + "Insects", + "Hawk and snake", + "Snake and raccoon", + "Mouse and cricket" + ], + "answer": "Insects", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Insects", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 377, + "img_width": 630, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "549": { + "question_id": "549", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, PA and PB are tangent to \u2299O to A and B respectively. Point C and point D are the moving points on line segments PA and PB, and CD always remains tangent to circle O. If PA = 8.0, then perimeter of \u25b3PCD is ()\nChoices:\n(A) 8\n(B) 12\n(C) 16\n(D) \u4e0d\u80fd\u786e\u5b9a", + "choices": [ + "8", + "12", + "16", + "\u4e0d\u80fd\u786e\u5b9a" + ], + "answer": "16", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "8", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 192, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "551": { + "question_id": "551", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest tattoos in male and the least in female?", + "choices": null, + "answer": "14", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "553": { + "question_id": "553", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Violet less than Chocolate?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "555": { + "question_id": "555", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this nest larger than a fist?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 640, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "557": { + "question_id": "557", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220BAC\uff1d90\u00b0\uff0c\u4ee5Rt\u25b3ABC\u7684\u4e09\u8fb9\u4e3a\u8fb9\u5206\u522b\u5411\u5916\u4f5c\u7b49\u8fb9\u4e09\u89d2\u5f62\u25b3A'BC\uff0c\u25b3AB'C\uff0c\u25b3ABC'\uff0c\u82e5\u25b3A'BC\uff0c\u25b3AB'C\u7684\u9762\u79ef\u5206\u522b\u662f10\u548c4\uff0c\u5219\u25b3ABC'\u7684\u9762\u79ef\u662f\uff08\uff09\nChoices:\n(A) 4\n(B) 6\n(C) 8\n(D) 9", + "choices": [ + "4", + "6", + "8", + "9" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 130, + "img_width": 155, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "559": { + "question_id": "559", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the highest number shown on the black outer part of the watch?", + "choices": null, + "answer": "55", + "extraction": "90", + "prediction": "90", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 768, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "561": { + "question_id": "561", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of gray rubber double buss right of the small red aeroplane the same as the number of small objects that are left of the tiny gray matte bicycle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "563": { + "question_id": "563", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which number on the monitor is higher?\nChoices:\n(A) top\n(B) bottom\n(C) left\n(D) right", + "choices": [ + "top", + "bottom", + "left", + "right" + ], + "answer": "bottom", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "top", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "565": { + "question_id": "565", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model can achieve the best ImageNet 10-shot Accuracy score?\nChoices:\n(A) Soft MoE\n(B) Experts Choice\n(C) Tokens Choice\n(D) Dense", + "choices": [ + "Soft MoE", + "Experts Choice", + "Tokens Choice", + "Dense" + ], + "answer": "Soft MoE", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Soft MoE", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 978, + "img_width": 1966, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "567": { + "question_id": "567", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the slug to the nearest inch. The slug is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 252, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "569": { + "question_id": "569", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which subject had the highest pulse rate in baseline period?", + "choices": null, + "answer": "1", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2284, + "img_width": 1786, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "571": { + "question_id": "571", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 613, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "573": { + "question_id": "573", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A race car driver kept track of how many laps he drove in the past 5 days. What is the mode of the numbers?'", + "choices": null, + "answer": "53", + "extraction": "55", + "prediction": "55", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 203, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "575": { + "question_id": "575", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Lines $l$, $m$, and $n$ are perpendicular bisectors of $\\triangle PQR$ and meet at $T$. If $TQ = 2x$, $PT = 3y - 1$, and $TR = 8$, find $z$.\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 287, + "img_width": 509, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "577": { + "question_id": "577", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Consider the following matrices:\r\n$$\r\n\\mathbf{A}=\\left(\\begin{array}{rrr}\r\n1 & 2 & -1 \\\\\r\n0 & 3 & 1 \\\\\r\n2 & 0 & 1\r\n\\end{array}\\right), \\quad \\mathbf{B}=\\left(\\begin{array}{rrr}\r\n2 & 1 & 0 \\\\\r\n0 & -1 & 2 \\\\\r\n1 & 1 & 3\r\n\\end{array}\\right), \\quad \\mathbf{C}=\\left(\\begin{array}{ll}\r\n2 & 1 \\\\\r\n4 & 3 \\\\\r\n1 & 0\r\n\\end{array}\\right)\r\n$$\r\nFind $|\\mathbf{A B}|$.", + "choices": null, + "answer": "-104", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 142, + "img_width": 533, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "579": { + "question_id": "579", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average number of documents required per shipment to export goods in Uganda per year?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1228, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "581": { + "question_id": "581", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large matte cubes. Subtract all matte blocks. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "583": { + "question_id": "583", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x. Round to the nearest tenth.\r\n\nChoices:\n(A) 5.8\n(B) 6.5\n(C) 14.2\n(D) 44.3", + "choices": [ + "5.8", + "6.5", + "14.2", + "44.3" + ], + "answer": "5.8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5.8", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 465, + "img_width": 319, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "585": { + "question_id": "585", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u77e9\u5f62ABCD\u4e2d\uff0cAB\uff1d2\uff0c\u2220AOB\uff1d60\u00b0\uff0c\u5219BD\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 4\n(B) 3\n(C) 2\n(D) 2\u221a{3}", + "choices": [ + "4", + "3", + "2", + "2\u221a{3}" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 148, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "587": { + "question_id": "587", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At 9.0 in the morning, a ship departs from point A and sails in the direction due east at a speed of 40.0 nautical miles per hour, and arrives at point B at 9.0 and 30.0 minutes. As shown in the figure, the island M is measured from A and B. In the direction of 45.0 north by east and 15.0 north by east, then the distance between B and island M is ()\nChoices:\n(A) 20\u6d77\u91cc\n(B) 20\u221a{2}\u6d77\u91cc\n(C) 15\u6d77\u91cc\n(D) 20\u6d77\u91cc", + "choices": [ + "20\u6d77\u91cc", + "20\u221a{2}\u6d77\u91cc", + "15\u6d77\u91cc", + "20\u6d77\u91cc" + ], + "answer": "20\u221a{2}\u6d77\u91cc", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20\u6d77\u91cc", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 124, + "img_width": 144, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "589": { + "question_id": "589", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number of things are either large objects behind the shiny double bus or tiny gray metal objects?", + "choices": null, + "answer": "5", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "591": { + "question_id": "591", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 600, + "img_width": 900, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "593": { + "question_id": "593", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average of longest light blue bar and shortest gray bar?", + "choices": null, + "answer": "273", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "595": { + "question_id": "595", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Navy Blue the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "597": { + "question_id": "597", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people prefer the least preferred object?", + "choices": null, + "answer": "10", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "599": { + "question_id": "599", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, AC = 6 and BC = 3. Point P lies on line AB between A and B such that line CP is perpendicular to line AB. Which of the following could be the length of line CP?\nChoices:\n(A) 2\n(B) 4\n(C) 5\n(D) 7\n(E) 8", + "choices": [ + "2", + "4", + "5", + "7", + "8" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 340, + "img_width": 393, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "601": { + "question_id": "601", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What's the ratio of smallest segment and second largest segment?", + "choices": null, + "answer": "0.33", + "extraction": "0.17", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 386, + "img_width": 210, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "603": { + "question_id": "603", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is cumulative increase in weight ( in grams) for \"GROUP C\" in third week ( give an approximate value) ?", + "choices": null, + "answer": "300", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2237, + "img_width": 1754, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "605": { + "question_id": "605", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large green matte cubes. Subtract all big green blocks. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "607": { + "question_id": "607", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow shiny things. Subtract all yellow metal things. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "609": { + "question_id": "609", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big green matte cylinders. Subtract all big brown cubes. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "611": { + "question_id": "611", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A shipping company keeps track of the number of boxes in each shipment they send out. How many shipments had exactly 56 boxes? (Unit: shipments)", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 180, + "img_width": 153, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "613": { + "question_id": "613", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many houses are there?", + "choices": null, + "answer": "10", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 87, + "img_width": 473, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "615": { + "question_id": "615", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If two sides of a triangle measure 12 and 7, which of the following cannot be the perimeter of the triangle?\nChoices:\n(A) 29\n(B) 34\n(C) 37\n(D) 38", + "choices": [ + "29", + "34", + "37", + "38" + ], + "answer": "38", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "29", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 195, + "img_width": 522, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "617": { + "question_id": "617", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The magnitude of the acceleration vector a is $10 \\mathrm{~cm} / \\mathrm{s}^2$. Use the figure to estimate the normal components of $\\mathbf{a}$.", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 484, + "img_width": 478, + "language": "english", + "skills": [ + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "619": { + "question_id": "619", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(4)?", + "choices": null, + "answer": "16", + "extraction": "-2", + "prediction": "-2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 666, + "img_width": 970, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "621": { + "question_id": "621", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The figure above is composed of 25 small triangles that are congruent and equilateral. If the area of triangle DFH is 10, what is the area of triangle AFK?\nChoices:\n(A) 40\n(B) 42.5\n(C) 50\n(D) 52.5\n(E) 62.5", + "choices": [ + "40", + "42.5", + "50", + "52.5", + "62.5" + ], + "answer": "62.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 315, + "img_width": 397, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "623": { + "question_id": "623", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is twelve (_).\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "o'clock", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "625": { + "question_id": "625", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of blue matte school buss greater than the number of large cyan metallic jets?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "627": { + "question_id": "627", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Some friends played a trivia game and recorded their scores. What is the mode of the numbers?'", + "choices": null, + "answer": "6", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 311, + "img_width": 155, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "629": { + "question_id": "629", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people prefer the object hut?", + "choices": null, + "answer": "20", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "631": { + "question_id": "631", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "0", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "633": { + "question_id": "633", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, $m\u22201 = 123$. Find the measure of $\\angle 14$.\nChoices:\n(A) 47\n(B) 57\n(C) 67\n(D) 123", + "choices": [ + "47", + "57", + "67", + "123" + ], + "answer": "57", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "47", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 330, + "img_width": 361, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "635": { + "question_id": "635", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, E is any point in \u25b1ABCD, if S~quadrilateral ABCD~ = 6.0, then the area of \u200b\u200bthe shaded part in the figure is ()\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5", + "choices": [ + "2", + "3", + "4", + "5" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 86, + "img_width": 179, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "637": { + "question_id": "637", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfa\u2225b\uff0c\u76f4\u7ebfa\u4e0e\u77e9\u5f62ABCD\u7684\u8fb9AB\uff0cAD\u5206\u522b\u4ea4\u4e8e\u70b9E\uff0cF\uff0c\u76f4\u7ebfb\u4e0e\u77e9\u5f62ABCD\u7684\u8fb9CB\uff0cCD\u5206\u522b\u4ea4\u4e8e\u70b9G\uff0cH\uff0e\u82e5\u2220AFE\uff1d30\u00b0\uff0c\u5219\u2220DHG\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 100\u00b0\n(B) 110\u00b0\n(C) 120\u00b0\n(D) 130\u00b0", + "choices": [ + "100\u00b0", + "110\u00b0", + "120\u00b0", + "130\u00b0" + ], + "answer": "120\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "100\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 108, + "img_width": 166, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "639": { + "question_id": "639", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What does the dial indicate as the top facing number?", + "choices": null, + "answer": "475", + "extraction": "500", + "prediction": "500", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1024, + "img_width": 768, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VizWiz", + "split": "testmini", + "task": "visual question answering" + }, + "641": { + "question_id": "641", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: The graph of the concentration function $c(t)$ is shown after a 7-mg injection of dye into a heart. Use Simpson's Rule to estimate the cardiac output.", + "choices": null, + "answer": "5.77", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 420, + "img_width": 828, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "643": { + "question_id": "643", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, CD is the diameter of \u2299O, chord DE \u2225 OA, if the degree of \u2220D is 50.0, then the degree of \u2220C is ()\nChoices:\n(A) 25\u00b0\n(B) 30\u00b0\n(C) 40\u00b0\n(D) 50\u00b0", + "choices": [ + "25\u00b0", + "30\u00b0", + "40\u00b0", + "50\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 125, + "img_width": 111, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "645": { + "question_id": "645", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAC\uff0cBD\u662f\u83f1\u5f62ABCD\u7684\u5bf9\u89d2\u7ebf\uff0cBH\u22a5AD\u4e8e\u70b9H\uff0c\u82e5AC\uff1d4\uff0cBD\uff1d3\uff0c\u5219BH\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 2.4\n(B) 2.5\n(C) 4.8\n(D) 5", + "choices": [ + "2.4", + "2.5", + "4.8", + "5" + ], + "answer": "2.4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2.4", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 113, + "img_width": 139, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "647": { + "question_id": "647", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the top view.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "B", + "extraction": "E", + "prediction": "E", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 900, + "img_width": 600, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "649": { + "question_id": "649", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many values are below 30 in Mainly are incidents of individual misconduct?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 461, + "img_width": 310, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "651": { + "question_id": "651", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For an assignment, Johnny looked at which countries got the most Nobel Prizes in various decades. In the 1990s, how many more Nobel Prize winners did Canada have than Italy? (Unit: Nobel Prize winners)", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 156, + "img_width": 224, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "653": { + "question_id": "653", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there at least three distinct shades of blue in this photo?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 425, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "655": { + "question_id": "655", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the value of Russia has the highest transport?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 507, + "img_width": 858, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "657": { + "question_id": "657", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Arkansas have a higher value than Indiana ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "659": { + "question_id": "659", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest value of navy blue bar?", + "choices": null, + "answer": "991", + "extraction": "1000", + "prediction": "1000", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "661": { + "question_id": "661", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this function most likely be?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a trigonometric function", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1274, + "img_width": 1732, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "663": { + "question_id": "663", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past six.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 203, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "665": { + "question_id": "665", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $h$ in the triangle.\nChoices:\n(A) 4.62\n(B) 5.66\n(C) 6.93\n(D) 8", + "choices": [ + "4.62", + "5.66", + "6.93", + "8" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4.62", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 161, + "img_width": 275, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "667": { + "question_id": "667", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year has the least difference between the used and new cars?", + "choices": null, + "answer": "2015", + "extraction": "2014", + "prediction": "2014", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "669": { + "question_id": "669", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, line segment AB = 10.0, M is the midpoint of line segment AB, C is the midpoint of line segment MB, N is a point of line segment AM, and MN = 1.0, the length of line segment NC ()\nChoices:\n(A) 2\n(B) 2.5\n(C) 3\n(D) 3.5", + "choices": [ + "2", + "2.5", + "3", + "3.5" + ], + "answer": "3.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 18, + "img_width": 187, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "671": { + "question_id": "671", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the size of the semicircle rounded to 2 decimal places?", + "choices": null, + "answer": "14.14", + "extraction": "3.14", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 312, + "img_width": 433, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "673": { + "question_id": "673", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of large green cars less than the number of brown rubber double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "675": { + "question_id": "675", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the cross section of a small reservoir dam is a right trapezoid, the width of crest BC is 6.0, the height of dam is 14.0, and the slope of the slope CD is i = 1.0:2.0, then the length of the dam bottom AD is ()\nChoices:\n(A) 13m\n(B) 34m\n(C) (6+14\u221a{3})m\n(D) 40m", + "choices": [ + "13m", + "34m", + "(6+14\u221a{3})m", + "40m" + ], + "answer": "34m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "13m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 83, + "img_width": 183, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "677": { + "question_id": "677", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of dirtbikes right of the large blue object less than the number of small green metallic cars in front of the tiny matte bicycle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "679": { + "question_id": "679", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b1ABCD, the diagonal AC and BD intersect at point O, if AC = 12.0, BD = 8.0, AB = 7.0, then the perimeter of \u25b3OAB is ()\nChoices:\n(A) 15\n(B) 17\n(C) 21\n(D) 27", + "choices": [ + "15", + "17", + "21", + "27" + ], + "answer": "17", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 73, + "img_width": 173, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "681": { + "question_id": "681", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the largest city in the nation where this plane is headquartered?\nChoices:\n(A) hong kong\n(B) osaka\n(C) shanghai\n(D) tokyo", + "choices": [ + "hong kong", + "osaka", + "shanghai", + "tokyo" + ], + "answer": "tokyo", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "hong kong", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "683": { + "question_id": "683", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 157, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "685": { + "question_id": "685", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to organism c if organism b increased?\nChoices:\n(A) decrease\n(B) increase\n(C) can't predict\n(D) stay same", + "choices": [ + "decrease", + "increase", + "can't predict", + "stay same" + ], + "answer": "increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 246, + "img_width": 574, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "687": { + "question_id": "687", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What could happen that would increase the number of krill?\nChoices:\n(A) increase in phytoplankton\n(B) decrease in penguins\n(C) increase in fish\n(D) increase in birds", + "choices": [ + "increase in phytoplankton", + "decrease in penguins", + "increase in fish", + "increase in birds" + ], + "answer": "increase in phytoplankton", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "increase in phytoplankton", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 396, + "img_width": 576, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "689": { + "question_id": "689", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are these people sitting in a circle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "691": { + "question_id": "691", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Calculate the missing item.", + "choices": null, + "answer": "256", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 500, + "img_width": 596, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "693": { + "question_id": "693", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the orange larger than the car?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "695": { + "question_id": "695", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Salmon greater than Dark Orchid?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 734, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "697": { + "question_id": "697", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the parallelogram ABCD, it is known that AB = 6.0, BC = 9.0, \u2220B = 30.0, then the area of \u200b\u200bthe parallelogram ABCD is ()\nChoices:\n(A) 12\n(B) 18\n(C) 27\n(D) 54", + "choices": [ + "12", + "18", + "27", + "54" + ], + "answer": "27", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "12", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 68, + "img_width": 205, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "699": { + "question_id": "699", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the center and the rightmost person? (Unit: years)", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2684, + "img_width": 4577, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "701": { + "question_id": "701", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 109, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "703": { + "question_id": "703", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the sum of highest value and lowest value of navy blue bar?", + "choices": null, + "answer": "2372.1", + "extraction": "1.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "705": { + "question_id": "705", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the heart wider than more than half the width of the thorax?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "medical image", + "grade": "college", + "img_height": 512, + "img_width": 419, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "VQA-RAD", + "split": "testmini", + "task": "visual question answering" + }, + "707": { + "question_id": "707", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0ca\u2225b\uff0c\u22201\uff1d60\u00b0\uff0c\u5219\u22202\u7684\u5927\u5c0f\u662f\uff08\uff09\nChoices:\n(A) 60\u00b0\n(B) 80\u00b0\n(C) 100\u00b0\n(D) 120\u00b0", + "choices": [ + "60\u00b0", + "80\u00b0", + "100\u00b0", + "120\u00b0" + ], + "answer": "120\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 120, + "img_width": 154, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "709": { + "question_id": "709", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(0)?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 393, + "img_width": 552, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "711": { + "question_id": "711", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 270, + "img_width": 369, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "713": { + "question_id": "713", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $x$.\nChoices:\n(A) 3\n(B) 4\n(C) 6\n(D) 7", + "choices": [ + "3", + "4", + "6", + "7" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 422, + "img_width": 521, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "715": { + "question_id": "715", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this a periodic function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1920, + "img_width": 1920, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "717": { + "question_id": "717", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is \\int_1^{\\infty} {1\\over x^{0.99}} dx finite according to this graph ?\n\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 350, + "img_width": 314, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "719": { + "question_id": "719", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Brenda graphed the daily low temperature for 5 days. What is the range of the numbers?'", + "choices": null, + "answer": "13", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 225, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "721": { + "question_id": "721", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many odd functions are in the graph?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 297, + "img_width": 441, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "723": { + "question_id": "723", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function convex?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 277, + "img_width": 468, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "725": { + "question_id": "725", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In Figure, suppose that Barbara's velocity relative to Alex is a constant $v_{B A}=52 \\mathrm{~km} / \\mathrm{h}$ and car $P$ is moving in the negative direction of the $x$ axis.\r\n(a) If Alex measures a constant $v_{P A}=-78 \\mathrm{~km} / \\mathrm{h}$ for car $P$, what velocity $v_{P B}$ will Barbara measure?", + "choices": null, + "answer": "-130", + "extraction": "-26", + "prediction": "-26", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 690, + "img_width": 976, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "727": { + "question_id": "727", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the largest and the smallest value in the chart?", + "choices": null, + "answer": "70", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "729": { + "question_id": "729", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest accuracy reported in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "731": { + "question_id": "731", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The train conductor made sure to count the number of passengers on each train. What is the smallest number of passengers? (Unit: passengers)", + "choices": null, + "answer": "40", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 180, + "img_width": 159, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "733": { + "question_id": "733", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Square ABCD. CT: tangent to semicircle. Find the angle \u2220CTD. Return the numeric value.", + "choices": null, + "answer": "63.4", + "extraction": "45.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 1018, + "img_width": 972, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "735": { + "question_id": "735", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big cyan things in front of the cyan rubber suv less than the number of big suvs that are behind the red bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "737": { + "question_id": "737", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the perimeter of the parallelogram.\nChoices:\n(A) 32\n(B) 39\n(C) 46\n(D) 78", + "choices": [ + "32", + "39", + "46", + "78" + ], + "answer": "78", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "32", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 179, + "img_width": 352, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "739": { + "question_id": "739", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Hannah need to buy a baking dish and a cookie jar? (Unit: $)", + "choices": null, + "answer": "23", + "extraction": "24", + "prediction": "24", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 160, + "img_width": 201, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "741": { + "question_id": "741", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "13", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1080, + "img_width": 1920, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "743": { + "question_id": "743", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the different between the highest unemployment rate and the lowest?", + "choices": null, + "answer": "10.53", + "extraction": "1.7", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "745": { + "question_id": "745", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2832, + "img_width": 4256, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "747": { + "question_id": "747", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\odot M$, $FL=24,HJ=48$, and $m \\widehat {HP}=65$. Find $m \\widehat {HJ}$.\nChoices:\n(A) 65\n(B) 120\n(C) 130\n(D) 155", + "choices": [ + "65", + "120", + "130", + "155" + ], + "answer": "130", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 467, + "img_width": 507, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "749": { + "question_id": "749", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b3ABC, DE \u2225 BC, if AB = 7.0, AC = 5.0, AD = 3.0, then DE = ()\nChoices:\n(A) \\frac{15}{4}cm\n(B) \\frac{20}{3}cm\n(C) \\frac{15}{7}cm\n(D) \\frac{20}{7}cm", + "choices": [ + "\\frac{15}{4}cm", + "\\frac{20}{3}cm", + "\\frac{15}{7}cm", + "\\frac{20}{7}cm" + ], + "answer": "\\frac{20}{7}cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{15}{4}cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 98, + "img_width": 181, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "751": { + "question_id": "751", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would most likely happen if Artemia was removed?\nChoices:\n(A) Seahorses would decrease\n(B) Rotifers would decrease\n(C) Mysids would decrease\n(D) Algae would decrease", + "choices": [ + "Seahorses would decrease", + "Rotifers would decrease", + "Mysids would decrease", + "Algae would decrease" + ], + "answer": "Seahorses would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Seahorses would decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 363, + "img_width": 862, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "753": { + "question_id": "753", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "755": { + "question_id": "755", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this function most likely be?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a polynomial", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 776, + "img_width": 1430, + "language": "english", + "skills": [ + "algebraic reasoning", + "statistical reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "757": { + "question_id": "757", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x to the nearest tenth. Assume that segments that appear to be tangent are tangent.\nChoices:\n(A) 7.2\n(B) 8\n(C) 12\n(D) 15", + "choices": [ + "7.2", + "8", + "12", + "15" + ], + "answer": "7.2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7.2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 165, + "img_width": 220, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "759": { + "question_id": "759", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 201, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "761": { + "question_id": "761", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens to the crayfish population if the Largemouth Bass and Northern Pike populations decrease?\nChoices:\n(A) Nothing\n(B) Decrease\n(C) Slightly Decrease\n(D) Increase", + "choices": [ + "Nothing", + "Decrease", + "Slightly Decrease", + "Increase" + ], + "answer": "Increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Nothing", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 319, + "img_width": 405, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "763": { + "question_id": "763", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny shiny balls. Subtract all purple objects. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "765": { + "question_id": "765", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Chartreuse the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 514, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "767": { + "question_id": "767", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the maximum value of y?", + "choices": null, + "answer": "5", + "extraction": "25", + "prediction": "25", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 429, + "img_width": 483, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "769": { + "question_id": "769", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagram below is a model of two solutions. Each blue ball represents one particle of solute. Which solution has a higher concentration of blue particles?\nChoices:\n(A) neither; their concentrations are the same\n(B) Solution A\n(C) Solution B", + "choices": [ + "neither; their concentrations are the same", + "Solution A", + "Solution B" + ], + "answer": "Solution A", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; their concentrations are the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "elementary school", + "img_height": 251, + "img_width": 378, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "771": { + "question_id": "771", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Base your answers on the diagram of a food chain below and on your knowledge of science. If the population of snakes increases, the population of frogs will most likely\nChoices:\n(A) decrease\n(B) remain the same\n(C) increase\n(D) None", + "choices": [ + "decrease", + "remain the same", + "increase", + "None" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 720, + "img_width": 960, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "773": { + "question_id": "773", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, point D is on the extended line of AB, passing point D is the tangent of \u2299O, and the tangent point is C, if \u2220A = 25.0, then \u2220D = ()\nChoices:\n(A) 25\u00b0\n(B) 40\u00b0\n(C) 50\u00b0\n(D) 65\u00b0", + "choices": [ + "25\u00b0", + "40\u00b0", + "50\u00b0", + "65\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 117, + "img_width": 163, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "775": { + "question_id": "775", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 724, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "777": { + "question_id": "777", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In rhombus LMPQ, $m \\angle Q L M=2 x^{2}-10$, $m \\angle Q P M=8 x$, and $M P=10$ . \r\nFind the perimeter of $LMPQ$\nChoices:\n(A) 10\n(B) 40\n(C) 70\n(D) 140", + "choices": [ + "10", + "40", + "70", + "140" + ], + "answer": "40", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 177, + "img_width": 337, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "779": { + "question_id": "779", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the cardiac silhouette less than half the diameter of the diaphragm?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "medical image", + "grade": "college", + "img_height": 841, + "img_width": 1023, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "VQA-RAD", + "split": "testmini", + "task": "visual question answering" + }, + "781": { + "question_id": "781", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\triangle CDF$, $K$ is the centroid and $DK=16$. Find $CD$.\nChoices:\n(A) 9\n(B) 12\n(C) 18\n(D) 18", + "choices": [ + "9", + "12", + "18", + "18" + ], + "answer": "18", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "9", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 540, + "img_width": 461, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "783": { + "question_id": "783", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In order to measure the width of parallel river AB, \u2220ACB = 30.0, \u2220ADB = 60.0, CD = 60.0, then the width of the river AB is ()\nChoices:\n(A) 30m\n(B) 30\u221a{3}m\n(C) (30\u221a{3}+30)m\n(D) (30\u221a{3}-30)m", + "choices": [ + "30m", + "30\u221a{3}m", + "(30\u221a{3}+30)m", + "(30\u221a{3}-30)m" + ], + "answer": "30\u221a{3}m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 87, + "img_width": 130, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "785": { + "question_id": "785", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Part of an ecosystem is shown in this diagram. Imagine the algae and floating plants are prevented from growing. How will that most likely affect this ecosystem?\nChoices:\n(A) The number of ducks will increase\n(B) The number of minnows will increase\n(C) There will be no effect on this ecosystem\n(D) The number of aquatic crustaceans will decrease", + "choices": [ + "The number of ducks will increase", + "The number of minnows will increase", + "There will be no effect on this ecosystem", + "The number of aquatic crustaceans will decrease" + ], + "answer": "The number of aquatic crustaceans will decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "The number of ducks will increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 258, + "img_width": 456, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "787": { + "question_id": "787", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of the zebra's stripes are horizontal?", + "choices": null, + "answer": "50", + "extraction": "90", + "prediction": "90", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "789": { + "question_id": "789", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the values of posse and mortar?", + "choices": null, + "answer": "10", + "extraction": "10", + "prediction": "10", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "791": { + "question_id": "791", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Given $V_s$ = 5V, $R_1$ = 1k\u03a9, $R_2$ = 2.2k\u03a9, $R_3$ = 2.2k\u03a9, $R_4$ = 1.5k\u03a9, and $R_L$ = 4.7k\u03a9. Determine the voltage and current across $R_L$. Answer in unit of V (3 sig.fig.).", + "choices": null, + "answer": "1.06", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 400, + "img_width": 444, + "language": "english", + "skills": [ + "algebraic reasoning", + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "793": { + "question_id": "793", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest Elo score for the agent using an offline RL algorithm?", + "choices": null, + "answer": "1578", + "extraction": "178", + "prediction": "178", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1056, + "img_width": 1922, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "795": { + "question_id": "795", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "75", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 601, + "img_width": 475, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "797": { + "question_id": "797", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the missing pattern in the picture?\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4\n(E) 5\n(F) 6", + "choices": [ + "1", + "2", + "3", + "4", + "5", + "6" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 291, + "img_width": 386, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "799": { + "question_id": "799", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Ruth need to buy a baking dish, a casserole dish, and an ice cream scoop? (Unit: $)", + "choices": null, + "answer": "13", + "extraction": "13", + "prediction": "13", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 128, + "img_width": 229, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "801": { + "question_id": "801", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A gymnast jotted down the number of cartwheels she did each day. What is the mode of the numbers?'", + "choices": null, + "answer": "10", + "extraction": "9", + "prediction": "9", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 280, + "img_width": 272, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "803": { + "question_id": "803", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "805": { + "question_id": "805", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the donut more than half eaten?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 434, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "807": { + "question_id": "807", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following leaf shapes would have the least amount of wind resistance and water loss?\nChoices:\n(A) Truncate\n(B) Acuminate\n(C) Rounded\n(D) Sagittate", + "choices": [ + "Truncate", + "Acuminate", + "Rounded", + "Sagittate" + ], + "answer": "Acuminate", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Truncate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 300, + "img_width": 508, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "809": { + "question_id": "809", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In a group of horses, some individuals have a black coat and others have a reddish-brown coat. In this group, the gene for the coat color trait has two alleles. The allele for a black coat (L) is dominant over the allele for a reddish-brown coat (l).\nThis Punnett square shows a cross between two horses. What is the expected ratio of offspring with a reddish-brown coat to offspring with a black coat? Choose the most likely ratio.\nChoices:\n(A) 1:3\n(B) 4:0\n(C) 3:1\n(D) 0:4\n(E) 2:2", + "choices": [ + "1:3", + "4:0", + "3:1", + "0:4", + "2:2" + ], + "answer": "2:2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1:3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 241, + "img_width": 233, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "811": { + "question_id": "811", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A machine at the candy factory dispensed different numbers of lemon-flavored candies into various bags. What is the smallest number of lemon-flavored candies? (Unit: lemon-flavored candies)", + "choices": null, + "answer": "34", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 136, + "img_width": 247, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "813": { + "question_id": "813", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest value on the X axis?", + "choices": null, + "answer": "30", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2264, + "img_width": 1768, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "815": { + "question_id": "815", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle N C L$\nChoices:\n(A) 60\n(B) 120\n(C) 240\n(D) 360", + "choices": [ + "60", + "120", + "240", + "360" + ], + "answer": "120", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 279, + "img_width": 367, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "817": { + "question_id": "817", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the straight line a \u2225 b, the point B is on the straight line b, and AB \u22a5 BC, \u22202 = 65.0, then the degree of \u22201 is ()\nChoices:\n(A) 65\u00b0\n(B) 25\u00b0\n(C) 35\u00b0\n(D) 45\u00b0", + "choices": [ + "65\u00b0", + "25\u00b0", + "35\u00b0", + "45\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 94, + "img_width": 171, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "819": { + "question_id": "819", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the value of $t$ in the parallelogram.\nChoices:\n(A) 6\n(B) 7\n(C) 8\n(D) 13", + "choices": [ + "6", + "7", + "8", + "13" + ], + "answer": "7", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 400, + "img_width": 428, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "821": { + "question_id": "821", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are most of the people young men?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 360, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "823": { + "question_id": "823", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: You can see how organisms are interconnected from the diagram given. What will be the effect if all the Killer whales are removed?\nChoices:\n(A) The population of tuna will increase\n(B) Mouse will decrease in number\n(C) The phytoplankton will decrease\n(D) The grasshopper will die", + "choices": [ + "The population of tuna will increase", + "Mouse will decrease in number", + "The phytoplankton will decrease", + "The grasshopper will die" + ], + "answer": "The population of tuna will increase", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "The population of tuna will increase", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1080, + "img_width": 1152, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "825": { + "question_id": "825", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of metallic road bikes that are behind the large bus less than the number of small matte double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "827": { + "question_id": "827", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer for the missing picture.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "D", + "extraction": "E", + "prediction": "E", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 1138, + "img_width": 828, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "829": { + "question_id": "829", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which matchstick needs to be moved in order to create a square?\nChoices:\n(A) Top\n(B) Bottom\n(C) Left\n(D) Right\n(E) Not possible", + "choices": [ + "Top", + "Bottom", + "Left", + "Right", + "Not possible" + ], + "answer": "Left", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Top", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 396, + "img_width": 378, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "831": { + "question_id": "831", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An author recorded how many words she wrote in the past 3 days. How many words in total did the author write on Thursday and Friday? (Unit: words)", + "choices": null, + "answer": "679", + "extraction": "635", + "prediction": "635", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 156, + "img_width": 236, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "833": { + "question_id": "833", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Phenylalanine (Phe, 5) is a naturally occurring amino acid. What is the energy of interaction between its phenyl group and the electric dipole moment of a neighbouring peptide group? Take the distance between the groups as $4.0 \\mathrm{~nm}$ and treat the phenyl group as a benzene molecule. The magnitude of the dipole moment of the peptide group is $\\mu=1.3 \\mathrm{D}$ and the polarizability volume of benzene is $\\alpha^{\\prime}=1.04 \\times 10^{-29} \\mathrm{~m}^3$.", + "choices": null, + "answer": "-4.3", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 372, + "img_width": 474, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "835": { + "question_id": "835", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percent of people are wearing blue?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "837": { + "question_id": "837", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tiny red motorbikes than big red choppers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "839": { + "question_id": "839", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many years have value less than 10%?", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "841": { + "question_id": "841", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Some friends compared the sizes of their stuffed animal collections. What is the median of the numbers?'", + "choices": null, + "answer": "9", + "extraction": "9", + "prediction": "9", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 265, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "843": { + "question_id": "843", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua greater than Red?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 752, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "845": { + "question_id": "845", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 390, + "img_width": 550, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "847": { + "question_id": "847", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which function grows the fastest as x increases?\nChoices:\n(A) red\n(B) purple\n(C) blue", + "choices": [ + "red", + "purple", + "blue" + ], + "answer": "red", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "red", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1294, + "img_width": 1706, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "849": { + "question_id": "849", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The 4 8x8 images shown below are encoded with JPEG coding. Based on their expected DCT (Discrete Cosine Transform) coefficients, Which image has the most non-zero AC coefficients? (a): Image A, (b): Image B, (c): Image C, (d): Image D.\nChoices:\n(A) (c)\n(B) (d)\n(C) (a)\n(D) (b)\n(E) (e)", + "choices": [ + "(c)", + "(d)", + "(a)", + "(b)", + "(e)" + ], + "answer": "(b)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(c)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 282, + "img_width": 940, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "851": { + "question_id": "851", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the net concessional disbursements from imf greater than 32000000 US$?", + "choices": null, + "answer": "2", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1139, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "853": { + "question_id": "853", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the diamond ABCD, \u2220BAD = 120.0, the length of the diagonal AC is 3.0, then the perimeter of the diamond ABCD is ()\nChoices:\n(A) 3\n(B) 6\n(C) 9\n(D) 12", + "choices": [ + "3", + "6", + "9", + "12" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 98, + "img_width": 169, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "855": { + "question_id": "855", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $x$ so that $a \u2225 b$.\nChoices:\n(A) 2.5\n(B) 14\n(C) 15\n(D) 16", + "choices": [ + "2.5", + "14", + "15", + "16" + ], + "answer": "14", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 250, + "img_width": 536, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "857": { + "question_id": "857", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "859": { + "question_id": "859", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "27", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 603, + "img_width": 750, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "861": { + "question_id": "861", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Crimson less than Gray?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 680, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "863": { + "question_id": "863", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Rhode Island have the lowest value in the USA ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "865": { + "question_id": "865", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Hot Pink have the lowest value?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 512, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "867": { + "question_id": "867", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A food industry researcher compiled the revenues of several pizzerias. How much did Dan's Deep Dish make from pizza sales? (Unit: $)", + "choices": null, + "answer": "22", + "extraction": "14", + "prediction": "14", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 465, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "869": { + "question_id": "869", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large yellow matte cubes. Subtract all metal things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "871": { + "question_id": "871", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 200, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "873": { + "question_id": "873", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many groups of bars contain at least one bar with value smaller than 40?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "875": { + "question_id": "875", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow things. Subtract all blue cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "877": { + "question_id": "877", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms squad and warm?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "879": { + "question_id": "879", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large gray rubber things. Subtract all small blue spheres. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "881": { + "question_id": "881", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the population of grasshopper decreases, the population of mouse will most likely do what?\nChoices:\n(A) decrease\n(B) remain the same\n(C) increase\n(D) NA", + "choices": [ + "decrease", + "remain the same", + "increase", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "883": { + "question_id": "883", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "15", + "extraction": "18", + "prediction": "18", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 207, + "img_width": 868, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "885": { + "question_id": "885", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Grayson counted the number of pieces of pepperoni on each pizza he made. What is the smallest number of pieces of pepperoni? (Unit: pieces of pepperoni)", + "choices": null, + "answer": "18", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 136, + "img_width": 225, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "887": { + "question_id": "887", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u25b3ABC is the inscribed triangle of \u2299O. If \u2220ABC = 70.0, then the degree of \u2220AOC is equal to ()\nChoices:\n(A) 140\u00b0\n(B) 130\u00b0\n(C) 120\u00b0\n(D) 110\u00b0", + "choices": [ + "140\u00b0", + "130\u00b0", + "120\u00b0", + "110\u00b0" + ], + "answer": "140\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "140\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 106, + "img_width": 119, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "889": { + "question_id": "889", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Purple the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 472, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "891": { + "question_id": "891", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracy lower than 8 in at least one dataset?", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "893": { + "question_id": "893", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the limit of the blue function as x approaches negative infinity?", + "choices": null, + "answer": "0", + "extraction": "-4", + "prediction": "-4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 331, + "img_width": 327, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "895": { + "question_id": "895", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model has the lowest Audio-Audio Similarity and Text-Audio Similarity scores overall?\nChoices:\n(A) MusicLDM (mix-up)\n(B) MusicLDM (original)\n(C) MusicLDM (BLM)\n(D) MusicLDM (BAM)\n(E) MuBERT", + "choices": [ + "MusicLDM (mix-up)", + "MusicLDM (original)", + "MusicLDM (BLM)", + "MusicLDM (BAM)", + "MuBERT" + ], + "answer": "MuBERT", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "MusicLDM (mix-up)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "violin plot", + "grade": "college", + "img_height": 682, + "img_width": 1882, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "897": { + "question_id": "897", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use a calculator to find the measure of $\u2220J$ to the nearest degree.\nChoices:\n(A) 33\n(B) 40\n(C) 50\n(D) 57", + "choices": [ + "33", + "40", + "50", + "57" + ], + "answer": "40", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "33", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 223, + "img_width": 352, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "899": { + "question_id": "899", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number comes next?", + "choices": null, + "answer": "2123", + "extraction": "1357", + "prediction": "1357", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 185, + "img_width": 406, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "901": { + "question_id": "901", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all shiny spheres. Subtract all big red matte spheres. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "903": { + "question_id": "903", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, if \u2220ABC = 30.0, then the degree of \u2220AOC is ()\nChoices:\n(A) 30\u00b0\n(B) 45\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "30\u00b0", + "45\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "60\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 112, + "img_width": 110, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "905": { + "question_id": "905", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of large red cars behind the metal car less than the number of blue matte tandem bikes that are behind the big blue rubber utility bike?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "907": { + "question_id": "907", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When the military expenditure value was lower than 0.2%?", + "choices": null, + "answer": "1970", + "extraction": "1970", + "prediction": "1970", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "909": { + "question_id": "909", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b3ABC, DE \u2225 BC, if AD = 1.0, DB = 2.0, then the value of \\frac ADAB is ()\nChoices:\n(A) \\frac{2}{3}\n(B) \\frac{1}{4}\n(C) \\frac{1}{3}\n(D) \\frac{1}{2}", + "choices": [ + "\\frac{2}{3}", + "\\frac{1}{4}", + "\\frac{1}{3}", + "\\frac{1}{2}" + ], + "answer": "\\frac{1}{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{2}{3}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 132, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "911": { + "question_id": "911", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the smaller picture below the larger picture?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "913": { + "question_id": "913", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Cyan have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 763, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "915": { + "question_id": "915", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to the Lion population if the Gum Tree population decreased?\nChoices:\n(A) Unable to determine.\n(B) Nothing would happen.\n(C) It would also decrease.\n(D) It would increase.", + "choices": [ + "Unable to determine.", + "Nothing would happen.", + "It would also decrease.", + "It would increase." + ], + "answer": "It would also decrease.", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Unable to determine.", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 740, + "img_width": 528, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "917": { + "question_id": "917", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the ratio of the number of procedures to register a business in 2004 to that in 2007?", + "choices": null, + "answer": "1", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 939, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "919": { + "question_id": "919", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many items sold more than 3 units in at least one store?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "921": { + "question_id": "921", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x to the nearest tenth. Assume that segments that appear to be tangent are tangent.\nChoices:\n(A) 5\n(B) 8.1\n(C) 10.3\n(D) 21.6", + "choices": [ + "5", + "8.1", + "10.3", + "21.6" + ], + "answer": "21.6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 170, + "img_width": 226, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "923": { + "question_id": "923", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model achieves the highest score in terms of Rec?\nChoices:\n(A) Transformers Agent (GPT-4)\n(B) LLaMA-Adapter v2-7B\n(C) LLaVA-7B\n(D) Otter-9B \n(E) MM-ReAct-GPT-3.5\n(F) LLaVA-13B (LLaMA-2)\n(G) MM-ReAct-GPT-4", + "choices": [ + "Transformers Agent (GPT-4)", + "LLaMA-Adapter v2-7B", + "LLaVA-7B", + "Otter-9B ", + "MM-ReAct-GPT-3.5", + "LLaVA-13B (LLaMA-2)", + "MM-ReAct-GPT-4" + ], + "answer": "LLaVA-13B (LLaMA-2)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Transformers Agent (GPT-4)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1056, + "img_width": 1910, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "925": { + "question_id": "925", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Haley went to the store. She bought 3+9/10 pounds of pumpernickel bread crumbs. How much did she spend? (Unit: $)", + "choices": null, + "answer": "19.5", + "extraction": "15.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 130, + "img_width": 334, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "927": { + "question_id": "927", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0cAB\u7684\u5782\u76f4\u5e73\u5206\u7ebf\u4ea4AB\u4e8e\u70b9D\uff0c\u4ea4BC\u4e8e\u70b9E\uff0c\u8fde\u63a5AE\uff0e\u82e5AB\uff1d6\uff0c\u25b3ACE\u7684\u5468\u957f\u4e3a13\uff0c\u5219\u25b3ABC\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 19\n(B) 16\n(C) 29\n(D) 18", + "choices": [ + "19", + "16", + "29", + "18" + ], + "answer": "19", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "19", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 152, + "img_width": 199, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "929": { + "question_id": "929", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Tim need to buy a mystery game and a toy rocket? (Unit: $)", + "choices": null, + "answer": "85", + "extraction": "32", + "prediction": "32", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 192, + "img_width": 226, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "931": { + "question_id": "931", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u25b3ABC is the inscribed triangle of \u2299O, AB is the diameter of \u2299O, point D is a point on \u2299O, if \u2220ACD = 40.0, then the size of \u2220BAD is ()\nChoices:\n(A) 35\u00b0\n(B) 50\u00b0\n(C) 40\u00b0\n(D) 60\u00b0", + "choices": [ + "35\u00b0", + "50\u00b0", + "40\u00b0", + "60\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 123, + "img_width": 124, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "933": { + "question_id": "933", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much money does Hector need to buy a European vacation package and an Australian vacation package? (Unit: $)", + "choices": null, + "answer": "9606", + "extraction": "1796", + "prediction": "1796", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 160, + "img_width": 344, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "935": { + "question_id": "935", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0cAD\uff1d6\uff0cAB\uff1d4\uff0cDE\u5e73\u5206\u2220ADC\u4ea4BC\u4e8e\u70b9E\uff0c\u5219BE\u7684\u957f\u662f\uff08\uff09\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5", + "choices": [ + "2", + "3", + "4", + "5" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 81, + "img_width": 140, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "937": { + "question_id": "937", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Periwinkle the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 785, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "939": { + "question_id": "939", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would be most affected if the clams all died?\nChoices:\n(A) squid\n(B) lantern fish\n(C) octopus\n(D) sea horse", + "choices": [ + "squid", + "lantern fish", + "octopus", + "sea horse" + ], + "answer": "octopus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "squid", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 764, + "img_width": 1162, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "941": { + "question_id": "941", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which is the next number in the series?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 327, + "img_width": 271, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "943": { + "question_id": "943", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between two consecutive major ticks on the Y-axis ?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1258, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "945": { + "question_id": "945", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 451, + "img_width": 610, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "947": { + "question_id": "947", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u2225CD\uff0cBC\u2225DE\uff0c\u2220A\uff1d45\u00b0\uff0c\u2220C\uff1d110\u00b0\uff0c\u5219\u2220AED\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 95\u00b0\n(B) 105\u00b0\n(C) 115\u00b0\n(D) 125\u00b0", + "choices": [ + "95\u00b0", + "105\u00b0", + "115\u00b0", + "125\u00b0" + ], + "answer": "115\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "95\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 170, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "949": { + "question_id": "949", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the combined percentage of Lowest ROI and Medium ROI in SEO?", + "choices": null, + "answer": "56", + "extraction": "31", + "prediction": "31", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "951": { + "question_id": "951", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $x$.\nChoices:\n(A) 10.25\n(B) 12.75\n(C) 18.75\n(D) 25.5", + "choices": [ + "10.25", + "12.75", + "18.75", + "25.5" + ], + "answer": "12.75", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10.25", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 427, + "img_width": 487, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "953": { + "question_id": "953", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of trees have leaves?", + "choices": null, + "answer": "50", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "955": { + "question_id": "955", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0e\u70b9O\u662f\u6b63\u4e94\u8fb9\u5f62ABCDE\u7684\u4e2d\u5fc3\uff0c\u2299O\u662f\u6b63\u4e94\u8fb9\u5f62\u7684\u5916\u63a5\u5706\uff0c\u2220ADE\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 30\u00b0\n(B) 32\u00b0\n(C) 36\u00b0\n(D) 40\u00b0", + "choices": [ + "30\u00b0", + "32\u00b0", + "36\u00b0", + "40\u00b0" + ], + "answer": "36\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 136, + "img_width": 136, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "957": { + "question_id": "957", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big brown buss behind the gray matte aeroplane greater than the number of yellow shiny scooters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "959": { + "question_id": "959", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The teachers at an elementary school counted how many desks they had in their classrooms. What is the median of the numbers?'", + "choices": null, + "answer": "32", + "extraction": "32", + "prediction": "32", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 218, + "img_width": 230, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "961": { + "question_id": "961", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the lowest value in blue bar?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "963": { + "question_id": "963", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For what x does f reach its local maximum?", + "choices": null, + "answer": "3", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 397, + "img_width": 441, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "965": { + "question_id": "965", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: whats the lowest number yard line that you can see?", + "choices": null, + "answer": "30", + "extraction": "30", + "prediction": "30", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 690, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "967": { + "question_id": "967", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the amount earned from national visitors greater than the average amount earned from national visitors taken over all years ?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1146, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "969": { + "question_id": "969", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Yellow Green have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 587, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "971": { + "question_id": "971", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can the boy reach the highest book?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "973": { + "question_id": "973", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many zeros does this function have?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 2039, + "img_width": 2560, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "975": { + "question_id": "975", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown matte objects. Subtract all blue metallic objects. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "977": { + "question_id": "977", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5df2\u77e5AB\u2225CD\uff0cAF\u4e0eCD\u4ea4\u4e8e\u70b9E\uff0cBE\u22a5AF\uff0c\u2220B\uff1d65\u00b0\uff0c\u5219\u2220DEF\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 65\u00b0\n(B) 5\u00b0\n(C) 15\u00b0\n(D) 25\u00b0", + "choices": [ + "65\u00b0", + "5\u00b0", + "15\u00b0", + "25\u00b0" + ], + "answer": "25\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 129, + "img_width": 250, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "979": { + "question_id": "979", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9079", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 279, + "img_width": 634, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "981": { + "question_id": "981", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the sum of 2002, 2003 and 2004?", + "choices": null, + "answer": "70.4", + "extraction": "6006.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "983": { + "question_id": "983", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest accuracy reported in the whole chart?", + "choices": null, + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "985": { + "question_id": "985", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the smallest percentage value recorded in the chart?", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "987": { + "question_id": "987", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A cross-section of an airplane wing is shown. Measurements of the thickness of the wing, in centimeters, at 20-centimeter intervals are 5.8, 20.3, 26.7, 29.0, 27.6, 27.3, 23.8, 20.5, 15.1, 8.7, and 2.8. Use the Midpoint Rule to estimate the area of the wing's cross-section.", + "choices": null, + "answer": "4232", + "extraction": "200", + "prediction": "200", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 338, + "img_width": 946, + "language": "english", + "skills": [ + "arithmetic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "989": { + "question_id": "989", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people like the most preferred object in the whole chart?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "991": { + "question_id": "991", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which functions have no zeros?\nChoices:\n(A) green and red\n(B) blue and orange\n(C) purple and grey", + "choices": [ + "green and red", + "blue and orange", + "purple and grey" + ], + "answer": "purple and grey", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "green and red", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 383, + "img_width": 616, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "993": { + "question_id": "993", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny yellow matte cylinders. Subtract all small blue rubber spheres. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "995": { + "question_id": "995", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the sum of the two bars in 18 to 21?", + "choices": null, + "answer": "18.27", + "extraction": "1.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "997": { + "question_id": "997", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is kx^2/2 larger than E at x=0?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 197, + "img_width": 537, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "999": { + "question_id": "999", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: The magnitude of the acceleration vector a is $10 \\mathrm{~cm} / \\mathrm{s}^2$. Use the figure to estimate the tangential components of $\\mathbf{a}$.", + "choices": null, + "answer": "4.5", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 484, + "img_width": 478, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "2": { + "question_id": "2", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the total volume of the measuring cup? (Unit: g)", + "choices": null, + "answer": "1000", + "extraction": "1000", + "prediction": "1000", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 768, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "4": { + "question_id": "4", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large rubber spheres. Subtract all big shiny cylinders. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "4", + "prediction": "4", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "6": { + "question_id": "6", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, if CB = 4.0, DB = 7.0, and D is the midpoint of AC, then the length of AC is ()\nChoices:\n(A) 6cm\n(B) 7cm\n(C) 4cm\n(D) 5cm", + "choices": [ + "6cm", + "7cm", + "4cm", + "5cm" + ], + "answer": "6cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6cm", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 30, + "img_width": 203, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "8": { + "question_id": "8", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny gray bicycles that are on the left side of the brown metal sedan greater than the number of things that are to the left of the tiny green bicycle?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "10": { + "question_id": "10", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which object comes next?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "E", + "extraction": "D", + "prediction": "D", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 418, + "img_width": 376, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "12": { + "question_id": "12", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer metallic fighters than rubber objects?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "14": { + "question_id": "14", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny objects that are behind the small metal jet less than the number of tiny things left of the tiny sedan?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "16": { + "question_id": "16", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many items sold less than 5 units in at least one store?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "18": { + "question_id": "18", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The passage below describes an experiment. Read the passage and then follow the instructions below.\n\nLinda applied a thin layer of wax to the underside of her snowboard and rode the board straight down a hill. Then, she removed the wax and rode the snowboard straight down the hill again. She repeated the rides four more times, alternating whether she rode with a thin layer of wax on the board or not. Her friend Bob timed each ride. Linda and Bob calculated the average time it took to slide straight down the hill on the snowboard with wax compared to the average time on the snowboard without wax.\nFigure: snowboarding down a hill. Identify the question that Linda and Bob's experiment can best answer.\nChoices:\n(A) Does Linda's snowboard slide down a hill in less time when it has a thin layer of wax or a thick layer of wax?\n(B) Does Linda's snowboard slide down a hill in less time when it has a layer of wax or when it does not have a layer of wax?", + "choices": [ + "Does Linda's snowboard slide down a hill in less time when it has a thin layer of wax or a thick layer of wax?", + "Does Linda's snowboard slide down a hill in less time when it has a layer of wax or when it does not have a layer of wax?" + ], + "answer": "Does Linda's snowboard slide down a hill in less time when it has a layer of wax or when it does not have a layer of wax?", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Does Linda's snowboard slide down a hill in less time when it has a thin layer of wax or a thick layer of wax?", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "elementary school", + "img_height": 232, + "img_width": 302, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "20": { + "question_id": "20", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sum of smallest two bar is greater then the largest bar?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "22": { + "question_id": "22", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 785, + "img_width": 555, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "24": { + "question_id": "24", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 709, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "26": { + "question_id": "26", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Black greater than Deep Sky Blue?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 761, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "28": { + "question_id": "28", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{AB}$ is a diameter, $AC=8$ inches, and $BC=15$ inches. Find the radius of the circle.\nChoices:\n(A) 7.5\n(B) 8\n(C) 8.5\n(D) 17", + "choices": [ + "7.5", + "8", + "8.5", + "17" + ], + "answer": "8.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 431, + "img_width": 519, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "30": { + "question_id": "30", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the two chords AB and CD in the circle intersect at E, \u2220D = 35.0, \u2220AEC = 105.0, then \u2220C = ()\nChoices:\n(A) 60\u00b0\n(B) 70\u00b0\n(C) 80\u00b0\n(D) 85\u00b0", + "choices": [ + "60\u00b0", + "70\u00b0", + "80\u00b0", + "85\u00b0" + ], + "answer": "70\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 113, + "img_width": 117, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "32": { + "question_id": "32", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0cAB\uff1dAC\uff0c\u2220CAB\uff1d40\u00b0\uff0c\u5219\u2220D\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 40\u00b0\n(B) 50\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "40\u00b0", + "50\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "70\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 100, + "img_width": 168, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "34": { + "question_id": "34", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function continuous at each point?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 479, + "img_width": 479, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "36": { + "question_id": "36", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "9", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 800, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "38": { + "question_id": "38", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values smaller than 6?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "40": { + "question_id": "40", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown blocks. Subtract all large blue rubber things. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "42": { + "question_id": "42", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "8", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 539, + "img_width": 401, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "44": { + "question_id": "44", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Chase wants to buy 4 kilograms of oval beads and 5 kilograms of star-shaped beads. How much will he spend? (Unit: $)", + "choices": null, + "answer": "18", + "extraction": "14", + "prediction": "14", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 226, + "img_width": 305, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "46": { + "question_id": "46", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to the population of adult spiders if predator ate all the spider eggs?\nChoices:\n(A) Adult spider population would remain the same\n(B) Adult spider population would double.\n(C) Adults spider population would decrease\n(D) Adult spider population would increase.", + "choices": [ + "Adult spider population would remain the same", + "Adult spider population would double.", + "Adults spider population would decrease", + "Adult spider population would increase." + ], + "answer": "Adults spider population would decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Adult spider population would remain the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 829, + "img_width": 1024, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "48": { + "question_id": "48", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle 3$.\nChoices:\n(A) 28\n(B) 38\n(C) 52\n(D) 62", + "choices": [ + "28", + "38", + "52", + "62" + ], + "answer": "38", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "28", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 426, + "img_width": 596, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "50": { + "question_id": "50", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the food web, what would likely happen if the number of large roach would decrease?\nChoices:\n(A) The population of steelheads would decrease.\n(B) The population of stickleback fry would increase.\n(C) The population of predatory insects would increase.\n(D) The population of predatory insects would decrease.", + "choices": [ + "The population of steelheads would decrease.", + "The population of stickleback fry would increase.", + "The population of predatory insects would increase.", + "The population of predatory insects would decrease." + ], + "answer": "The population of predatory insects would decrease.", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "The population of steelheads would decrease.", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 600, + "img_width": 633, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "52": { + "question_id": "52", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big red metallic spheres. Subtract all big brown matte things. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "6", + "prediction": "6", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "54": { + "question_id": "54", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, the ratio of the length of line AB to the length of line AC is 2 : 5. If AC = 25, what is the length of line AB?\nChoices:\n(A) 8\n(B) 10\n(C) 15\n(D) 18\n(E) 20", + "choices": [ + "8", + "10", + "15", + "18", + "20" + ], + "answer": "10", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "8", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 310, + "img_width": 433, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "56": { + "question_id": "56", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the rectangle?", + "choices": null, + "answer": "6", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 295, + "img_width": 202, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "58": { + "question_id": "58", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Firebrick have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 760, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "60": { + "question_id": "60", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "22", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 381, + "img_width": 477, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "62": { + "question_id": "62", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cE\uff0cF\u5206\u522b\u662f\u83f1\u5f62ABCD\u7684\u8fb9AB\uff0cAD\u7684\u4e2d\u70b9\uff0c\u4e14AB\uff1d5\uff0cAC\uff1d6\uff0e\u5219EF\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 4\n(B) 5\n(C) 5.5\n(D) 6", + "choices": [ + "4", + "5", + "5.5", + "6" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 138, + "img_width": 160, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "64": { + "question_id": "64", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagrams below show two pure samples of gas in identical closed, rigid containers. Each colored ball represents one gas particle. Both samples have the same number of particles. Compare the average kinetic energies of the particles in each sample. Which sample has the higher temperature?\nChoices:\n(A) neither; the samples have the same temperature\n(B) sample A\n(C) sample B", + "choices": [ + "neither; the samples have the same temperature", + "sample A", + "sample B" + ], + "answer": "sample A", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; the samples have the same temperature", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "elementary school", + "img_height": 405, + "img_width": 550, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "66": { + "question_id": "66", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer for the missing picture.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "A", + "extraction": "E", + "prediction": "E", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 562, + "img_width": 320, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "68": { + "question_id": "68", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5c06\u4e00\u6839\u957f\u5ea6\u4e3a16cm\u81ea\u7136\u4f38\u76f4\u7684\u5f39\u6027\u76ae\u7b4bAB\u4e24\u7aef\u56fa\u5b9a\u5728\u6c34\u5e73\u7684\u684c\u9762\u4e0a\uff0c\u7136\u540e\u628a\u4e2d\u70b9C\u7ad6\u76f4\u5411\u4e0a\u62c9\u53476cm\u81f3D\u70b9\uff08\u5982\u56fe\uff09\uff0c\u5219\u8be5\u5f39\u6027\u76ae\u7b4b\u88ab\u62c9\u957f\u4e86\uff08\uff09\nChoices:\n(A) 2cm\n(B) 4cm\n(C) 6cm\n(D) 8cm", + "choices": [ + "2cm", + "4cm", + "6cm", + "8cm" + ], + "answer": "4cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 84, + "img_width": 252, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "70": { + "question_id": "70", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "8", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2600, + "img_width": 2266, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "72": { + "question_id": "72", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A real estate agent drove around the neighborhood and counted the number of houses on each block. How many blocks have exactly 36 houses? (Unit: blocks)", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 136, + "img_width": 197, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "74": { + "question_id": "74", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the difference of largest and smallest bar?", + "choices": null, + "answer": "47.6", + "extraction": "100.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "76": { + "question_id": "76", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens to fish if pelicans increase?\nChoices:\n(A) decrease\n(B) nothing\n(C) increase\n(D) none of the above", + "choices": [ + "decrease", + "nothing", + "increase", + "none of the above" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 947, + "img_width": 850, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "78": { + "question_id": "78", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Find the missing value.", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 394, + "img_width": 1062, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "80": { + "question_id": "80", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the food web, what will happen if all the algae died due to pesticides?\nChoices:\n(A) Crabs and limpets will decrease\n(B) Dolphins will increase\n(C) Sea gulls will become extinct\n(D) Star fish will increase", + "choices": [ + "Crabs and limpets will decrease", + "Dolphins will increase", + "Sea gulls will become extinct", + "Star fish will increase" + ], + "answer": "Crabs and limpets will decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Crabs and limpets will decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 199, + "img_width": 372, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "82": { + "question_id": "82", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A square is inscribed in a circle of area 18$\\pi$ square units. Find the length of a side of the square.\nChoices:\n(A) 3\n(B) 3 \\sqrt 2\n(C) 6\n(D) 6 \\sqrt 2", + "choices": [ + "3", + "3 \\sqrt 2", + "6", + "6 \\sqrt 2" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 202, + "img_width": 200, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "84": { + "question_id": "84", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: ABCD is a square. Inscribed Circle center is O. Find the the angle of \u2220AMK. Return the numeric value.", + "choices": null, + "answer": "130.9", + "extraction": "120.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 1220, + "img_width": 1194, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "86": { + "question_id": "86", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which model has the highest Acc score when Pretrain Loss is equal to 1.80?\nChoices:\n(A) ICL\n(B) SFT\n(C) SFT 1/8\n(D) RFT k=100\n(E) RFT k=25\n(F) RET k=6\n(G) RFT U13B", + "choices": [ + "ICL", + "SFT", + "SFT 1/8", + "RFT k=100", + "RFT k=25", + "RET k=6", + "RFT U13B" + ], + "answer": "RFT U13B", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "ICL", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 1046, + "img_width": 1734, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "88": { + "question_id": "88", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A square is tangent to a line at point P in the figure above. What is the value of x?", + "choices": null, + "answer": "30", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 277, + "img_width": 442, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "90": { + "question_id": "90", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow matte blocks. Subtract all tiny brown cylinders. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "5", + "prediction": "5", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "92": { + "question_id": "92", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do the windows have a geometric shape that most houses have?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 375, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "94": { + "question_id": "94", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cD\u4e3a\u25b3ABC\u5185\u4e00\u70b9\uff0cCD\u5e73\u5206\u2220ACB\uff0cBD\u22a5CD\uff0c\u2220A\uff1d\u2220ABD\uff0c\u82e5\u2220DBC\uff1d54\u00b0\uff0c\u5219\u2220A\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 36\u00b0\n(B) 44\u00b0\n(C) 27\u00b0\n(D) 54\u00b0", + "choices": [ + "36\u00b0", + "44\u00b0", + "27\u00b0", + "54\u00b0" + ], + "answer": "27\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "36\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 74, + "img_width": 160, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "96": { + "question_id": "96", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: How many times Dissatisfied more than satisfied?", + "choices": null, + "answer": "3.9", + "extraction": "3.8", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 328, + "img_width": 186, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "98": { + "question_id": "98", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Find the value of the square in the figure.", + "choices": null, + "answer": "2", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 506, + "img_width": 900, + "language": "english", + "skills": [ + "logical reasoning", + "algebraic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "100": { + "question_id": "100", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of all the values in the ruling group?", + "choices": null, + "answer": "12", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "102": { + "question_id": "102", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The shape is made of unit squares. What is the area of the shape?", + "choices": null, + "answer": "6", + "extraction": "36", + "prediction": "36", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 156, + "img_width": 106, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "104": { + "question_id": "104", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?", + "choices": null, + "answer": "0.8", + "extraction": "1.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "106": { + "question_id": "106", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values smaller than 1?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "108": { + "question_id": "108", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Find out the average of the bottom two countries ??", + "choices": null, + "answer": "51.04", + "extraction": "40.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "110": { + "question_id": "110", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sum of two lowest bar is greater then the largest bar?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "112": { + "question_id": "112", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big cyan airliners less than the number of gray shiny utility bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "114": { + "question_id": "114", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, KL is tangent to $\\odot M$ at K. Find the value of x.\nChoices:\n(A) 6.00\n(B) 9.45\n(C) 18.9\n(D) 37.8", + "choices": [ + "6.00", + "9.45", + "18.9", + "37.8" + ], + "answer": "9.45", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6.00", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 273, + "img_width": 347, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "116": { + "question_id": "116", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which leaf has the most veins?\nChoices:\n(A) Acuminate\n(B) Truncate\n(C) Mucronate\n(D) Acute", + "choices": [ + "Acuminate", + "Truncate", + "Mucronate", + "Acute" + ], + "answer": "Acuminate", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Acuminate", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 187, + "img_width": 350, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "118": { + "question_id": "118", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the maximum value of this function?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 296, + "img_width": 600, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "120": { + "question_id": "120", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the degree of this function?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 320, + "img_width": 312, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "122": { + "question_id": "122", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer yellow regular buss than small yellow metallic school buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "124": { + "question_id": "124", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: This type of leaf arrangement consists of at least three leaves attached to a node.\nChoices:\n(A) Whorled\n(B) Simple\n(C) Opposite\n(D) Alternate", + "choices": [ + "Whorled", + "Simple", + "Opposite", + "Alternate" + ], + "answer": "Whorled", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Whorled", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 225, + "img_width": 576, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "126": { + "question_id": "126", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the leftmost and the rigtmost person? (Unit: years)", + "choices": null, + "answer": "9", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 800, + "img_width": 623, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "128": { + "question_id": "128", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large metal blocks. Subtract all yellow cylinders. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "130": { + "question_id": "130", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1403, + "img_width": 1063, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "132": { + "question_id": "132", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u57284\u00d74\u7684\u6b63\u65b9\u5f62\u7f51\u683c\u4e2d\uff0c\u6bcf\u4e2a\u5c0f\u6b63\u65b9\u5f62\u7684\u8fb9\u957f\u5747\u4e3a1\uff0c\u70b9A\uff0cB\uff0cC\u90fd\u5728\u683c\u70b9\u4e0a\uff0cAD\u22a5BC\u4e8eD\uff0c\u5219AD\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 1.5\n(C) 2\n(D) \\frac{7}{3}", + "choices": [ + "1", + "1.5", + "2", + "\\frac{7}{3}" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 160, + "img_width": 155, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "134": { + "question_id": "134", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: People can use the engineering-design process to develop solutions to problems. One step in the process is testing if a potential solution meets the requirements of the design.\nThe passage below describes how the engineering-design process was used to test a solution to a problem. Read the passage. Then answer the question below.\n\nCooper was a landscape architect who was hired to design a new city park. The city council wanted the park to have space for outdoor concerts and to have at least 20% of the park shaded by trees. Cooper thought the concert area should be at least 150 meters from the road so traffic noise didn't interrupt the music. He developed three possible designs for the park with the concert area in a different location in each design. Then, he tested each design by measuring the distance between the road and the concert area.\nFigure: studying an architect's design. Which of the following could Cooper's test show?\nChoices:\n(A) if at least 20% of the park would be shaded by trees in each design\n(B) which design would have the least traffic noise in the concert area\n(C) which design would have the greatest distance between the concert area and the road", + "choices": [ + "if at least 20% of the park would be shaded by trees in each design", + "which design would have the least traffic noise in the concert area", + "which design would have the greatest distance between the concert area and the road" + ], + "answer": "which design would have the greatest distance between the concert area and the road", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "if at least 20% of the park would be shaded by trees in each design", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "high school", + "img_height": 232, + "img_width": 302, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "136": { + "question_id": "136", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest and the lowest value of blue bar?", + "choices": null, + "answer": "64", + "extraction": "6", + "prediction": "6", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 443, + "img_width": 415, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "138": { + "question_id": "138", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sandwich cut in half?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "140": { + "question_id": "140", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which food has the least carbs?\nChoices:\n(A) soup\n(B) water\n(C) sandwich\n(D) buns", + "choices": [ + "soup", + "water", + "sandwich", + "buns" + ], + "answer": "soup", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "soup", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 428, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "142": { + "question_id": "142", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is it split in half?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 425, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "144": { + "question_id": "144", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Natalie buys 4.6 kilograms of turmeric. What is the total cost? (Unit: $)", + "choices": null, + "answer": "13.8", + "extraction": "18.4", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 162, + "img_width": 210, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "146": { + "question_id": "146", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Kimberly's classmates revealed how many science articles they read. What is the range of the numbers?'", + "choices": null, + "answer": "4", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 286, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "148": { + "question_id": "148", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which leaf shape has the smallest base?\nChoices:\n(A) Hastate\n(B) Cordate\n(C) Sagittate\n(D) Decurrent", + "choices": [ + "Hastate", + "Cordate", + "Sagittate", + "Decurrent" + ], + "answer": "Decurrent", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Hastate", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 161, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "150": { + "question_id": "150", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A, B, and C are three points on \u2299O, and the straight line CD and \u2299O are tangent to point C. If \u2220DCB = 40.0, then the degree of \u2220CAB is ()\nChoices:\n(A) 40\u00b0\n(B) 50\u00b0\n(C) 80\u00b0\n(D) 100\u00b0", + "choices": [ + "40\u00b0", + "50\u00b0", + "80\u00b0", + "100\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 144, + "img_width": 110, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "152": { + "question_id": "152", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfl1\u2225l2\uff0c\u5c06\u542b30\u00b0\u89d2\u7684\u76f4\u89d2\u4e09\u89d2\u677f\u6309\u5982\u56fe\u65b9\u5f0f\u653e\u7f6e\uff0c\u76f4\u89d2\u9876\u70b9\u5728l2\u4e0a\uff0c\u82e5\u22201\uff1d76\u00b0\uff0c\u5219\u22202\uff1d\uff08\uff09\nChoices:\n(A) 36\u00b0\n(B) 45\u00b0\n(C) 44\u00b0\n(D) 64\u00b0", + "choices": [ + "36\u00b0", + "45\u00b0", + "44\u00b0", + "64\u00b0" + ], + "answer": "44\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "36\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 208, + "img_width": 229, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "154": { + "question_id": "154", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this an odd function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 744, + "img_width": 1114, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "156": { + "question_id": "156", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the limit of the as x approaches 1 from the left side?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 291, + "img_width": 327, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "158": { + "question_id": "158", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 685, + "img_width": 911, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "160": { + "question_id": "160", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x.\nChoices:\n(A) 10\n(B) 11\n(C) 12\n(D) 13", + "choices": [ + "10", + "11", + "12", + "13" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 227, + "img_width": 270, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "162": { + "question_id": "162", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The bird watcher counted the number of birds in each flock that passed overhead. How many flocks had at least 17 birds but fewer than 33 birds? (Unit: flocks)", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 202, + "img_width": 117, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "164": { + "question_id": "164", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in \u25b1ABCD, CE \u22a5 AB, point E is the foot of perpendicular, if \u2220D = 55.0, then \u2220BCE = ()\nChoices:\n(A) 55\u00b0\n(B) 35\u00b0\n(C) 25\u00b0\n(D) 30\u00b0", + "choices": [ + "55\u00b0", + "35\u00b0", + "25\u00b0", + "30\u00b0" + ], + "answer": "35\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "55\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 84, + "img_width": 161, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "166": { + "question_id": "166", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which Shape is missing?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E\n(F) F", + "choices": [ + "A", + "B", + "C", + "D", + "E", + "F" + ], + "answer": "B", + "extraction": "A", + "prediction": "A", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 816, + "img_width": 2028, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "168": { + "question_id": "168", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given that the Hue-Saturation subspace shown in Fig. Q2 is a perfect circle and that colors A, B and C can be represented as the 3 points shown in the subspace. Which color has the smallest saturation coefficient?\nChoices:\n(A) (c)\n(B) (a)\n(C) (e)\n(D) (d)\n(E) (b)", + "choices": [ + "(c)", + "(a)", + "(e)", + "(d)", + "(b)" + ], + "answer": "(b)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(c)", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 454, + "img_width": 414, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "170": { + "question_id": "170", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: f(-1) is ____ f(0).\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "smaller than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 296, + "img_width": 600, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "172": { + "question_id": "172", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Seafoam less than Dark Salmon?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 524, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "174": { + "question_id": "174", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tiny cyan suvs that are behind the aeroplane than cyan utility bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "176": { + "question_id": "176", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $RS$ if $\\triangle QRS$ is an equilateral triangle.\nChoices:\n(A) 0.5\n(B) 1\n(C) 1.5\n(D) 2", + "choices": [ + "0.5", + "1", + "1.5", + "2" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 292, + "img_width": 305, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "178": { + "question_id": "178", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9A\u3001C\u5728\u2220FBD\u7684\u4e24\u6761\u8fb9BF\u3001BD\u4e0a\uff0cBE\u5e73\u5206\u2220FBD\uff0cCE\u5e73\u5206\u2220ACD\uff0c\u8fde\u63a5AE\uff0c\u82e5\u2220BEC\uff1d35\u00b0\uff0c\u5219\u2220FAE\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 35\u00b0\n(B) 45\u00b0\n(C) 55\u00b0\n(D) 65\u00b0", + "choices": [ + "35\u00b0", + "45\u00b0", + "55\u00b0", + "65\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 99, + "img_width": 129, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "180": { + "question_id": "180", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny brown cylinders. Subtract all tiny brown objects. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "182": { + "question_id": "182", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Green greater than Yellow?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 589, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "184": { + "question_id": "184", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values smaller than 0?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "186": { + "question_id": "186", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, CD is a plane mirror, the light is emitted from point A, reflected by point E on CD, and irradiated to point B. If the incident angle is \u03b1, AC \u22a5 CD, BD \u22a5 CD, the feet of perpendicular are C, D, and AC = 3.0, BD = 6.0, CD = 10.0, then the length of the line segment ED is ()\nChoices:\n(A) \\frac{20}{3}\n(B) \\frac{10}{3}\n(C) 7\n(D) \\frac{14}{3}", + "choices": [ + "\\frac{20}{3}", + "\\frac{10}{3}", + "7", + "\\frac{14}{3}" + ], + "answer": "\\frac{20}{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{20}{3}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 112, + "img_width": 183, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "188": { + "question_id": "188", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many methods in the table achieve an A-847 score higher than 20.0?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 634, + "img_width": 2226, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "190": { + "question_id": "190", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 132, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "192": { + "question_id": "192", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the diameter CD of \u2299O crosses the midpoint G of chord EF, \u2220DCF = 20.0, then \u2220EOD is equal to ()\nChoices:\n(A) 10\u00b0\n(B) 20\u00b0\n(C) 40\u00b0\n(D) 80\u00b0", + "choices": [ + "10\u00b0", + "20\u00b0", + "40\u00b0", + "80\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 127, + "img_width": 101, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "194": { + "question_id": "194", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: On average, how many people can commute on this vehicle?", + "choices": null, + "answer": "50", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 408, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "196": { + "question_id": "196", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\u6240\u793a\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u5df2\u77e5\u70b9D\uff0cE\uff0cF\u5206\u522b\u4e3a\u8fb9BC\uff0cAD\uff0cCE\u7684\u4e2d\u70b9\uff0c\u4e14S\u25b3ABC\uff1d4cm2\uff0c\u5219S\u25b3DEF\u7b49\u4e8e\uff08\uff09\nChoices:\n(A) 2cm2\n(B) 1cm2\n(C) 0.5cm2\n(D) 0.25cm2", + "choices": [ + "2cm2", + "1cm2", + "0.5cm2", + "0.25cm2" + ], + "answer": "0.5cm2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2cm2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 81, + "img_width": 110, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "198": { + "question_id": "198", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Calculate the missing value.\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4", + "choices": [ + "1", + "2", + "3", + "4" + ], + "answer": "1", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 756, + "img_width": 890, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "200": { + "question_id": "200", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Sky Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 404, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "202": { + "question_id": "202", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "204": { + "question_id": "204", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: \u0627\u0632 \u0633\u0645\u062a \u0631\u0627\u0633\u062a \u062a\u0635\u0648\u06cc\u0631 \u062f\u0631\u0628 \u062f\u0648\u0645 \u0686\u0646\u062f \u0634\u06cc\u0634\u0647 \u0628\u062f\u0648\u0646 \u0631\u0646\u06af \u062f\u0627\u0631\u0647\u061f", + "choices": null, + "answer": "12", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 376, + "img_width": 564, + "language": "persian", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "ParsVQA-Caps", + "split": "testmini", + "task": "visual question answering" + }, + "206": { + "question_id": "206", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the scale factor from $Q$ to $Q'$.\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5", + "choices": [ + "2", + "3", + "4", + "5" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 611, + "img_width": 731, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "208": { + "question_id": "208", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between the leftmost and the rigtmost person? (Unit: years)", + "choices": null, + "answer": "5", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 195, + "img_width": 300, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "210": { + "question_id": "210", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 370, + "img_width": 493, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "212": { + "question_id": "212", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cornflower the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 403, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "214": { + "question_id": "214", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of amount earned from merchandise imports in Canada greater than the average percentage of amount earned from merchandise imports in Canada taken over all years ?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1109, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "216": { + "question_id": "216", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percentage of people like the most preferred object in the whole chart?", + "choices": null, + "answer": "90", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "218": { + "question_id": "218", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large red rubber blocks. Subtract all tiny red matte objects. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "220": { + "question_id": "220", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, \u2299O is the circumscribed circle of the quadrilateral ABCD, if \u2220O = 110.0, then the degree of \u2220C is ()\nChoices:\n(A) 125\u00b0\n(B) 120\u00b0\n(C) 105\u00b0\n(D) 90\u00b0", + "choices": [ + "125\u00b0", + "120\u00b0", + "105\u00b0", + "90\u00b0" + ], + "answer": "125\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "125\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 128, + "img_width": 124, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "222": { + "question_id": "222", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue shiny spheres. Subtract all big blue shiny cubes. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "224": { + "question_id": "224", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this a periodic function?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 744, + "img_width": 1114, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "226": { + "question_id": "226", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past three.\nChoices:\n(A) half\n(B) quarter\n(C) o'clock\n(D) quarter to\n(E) quarter past", + "choices": [ + "half", + "quarter", + "o'clock", + "quarter to", + "quarter past" + ], + "answer": "quarter", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "half", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 95, + "img_width": 95, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "228": { + "question_id": "228", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of circle O, DB and DC are respectively tangent to circle O at points B and C. If \u2220ACE = 25.0, then the degree of \u2220D is ()\nChoices:\n(A) 50\u00b0\n(B) 55\u00b0\n(C) 60\u00b0\n(D) 65\u00b0", + "choices": [ + "50\u00b0", + "55\u00b0", + "60\u00b0", + "65\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 97, + "img_width": 137, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "230": { + "question_id": "230", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracy higher than 9 in at least one dataset?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "232": { + "question_id": "232", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagram below is a model of two solutions. Each pink ball represents one particle of solute. Which solution has a higher concentration of pink particles?\nChoices:\n(A) neither; their concentrations are the same\n(B) Solution B\n(C) Solution A", + "choices": [ + "neither; their concentrations are the same", + "Solution B", + "Solution A" + ], + "answer": "Solution B", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "neither; their concentrations are the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 251, + "img_width": 378, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "234": { + "question_id": "234", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure shown above, AC = 6. What is the length of segment AB?\nChoices:\n(A) 3\n(B) 5\n(C) 6\n(D) 7\n(E) It cannot be determined from the information given", + "choices": [ + "3", + "5", + "6", + "7", + "It cannot be determined from the information given" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 378, + "img_width": 434, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "236": { + "question_id": "236", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $z$.\nChoices:\n(A) 7\n(B) 9\n(C) 12\n(D) 15", + "choices": [ + "7", + "9", + "12", + "15" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "7", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 423, + "img_width": 447, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "238": { + "question_id": "238", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find PT\nChoices:\n(A) 6\n(B) \\frac { 20 } { 3 }\n(C) 7\n(D) 22 / 3", + "choices": [ + "6", + "\\frac { 20 } { 3 }", + "7", + "22 / 3" + ], + "answer": "\\frac { 20 } { 3 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 250, + "img_width": 238, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "240": { + "question_id": "240", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2387, + "img_width": 3500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "242": { + "question_id": "242", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle A$ of quadrilateral ABCD\nChoices:\n(A) 45\n(B) 90\n(C) 135\n(D) 180", + "choices": [ + "45", + "90", + "135", + "180" + ], + "answer": "135", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 381, + "img_width": 621, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "244": { + "question_id": "244", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Aqua have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 500, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "246": { + "question_id": "246", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Assume that all gases are perfect and that data refer to 298 K unless otherwise stated. In 1995, the Intergovernmental Panel on Climate Change (IPCC) considered a global average temperature rise of $1.0-3.5^{\\circ} \\mathrm{C}$ likely by the year 2100 , with $2.0^{\\circ} \\mathrm{C}$ its best estimate. Because water vapour is itself a greenhouse gas, the increase in water vapour content of the atmosphere is of some concern to climate change experts. Predict the relative increase in water vapour in the atmosphere based on a temperature rises of $2.0 \\mathrm{~K}$, assuming that the relative humidity remains constant. (The present global mean temperature is $290 \\mathrm{~K}$, and the equilibrium vapour pressure of water at that temperature is 0.0189 bar.)", + "choices": null, + "answer": "13", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 216, + "img_width": 1098, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "248": { + "question_id": "248", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of green matte choppers greater than the number of large yellow shiny motorbikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "250": { + "question_id": "250", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The area $A$ of the shaded region is given. Find $x$. $A = 66$ cm$^2$ .\nChoices:\n(A) 4.6\n(B) 6.5\n(C) 13.0\n(D) 26.0", + "choices": [ + "4.6", + "6.5", + "13.0", + "26.0" + ], + "answer": "13.0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4.6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 286, + "img_width": 303, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "252": { + "question_id": "252", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Consider the infinitely long chain of resistors shown below. What is the resistance between terminals a and b if R=1?", + "choices": null, + "answer": "0.73", + "extraction": "0.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 169, + "img_width": 463, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "254": { + "question_id": "254", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of big objects that are in front of the metal fighter less than the number of things that are behind the big metallic bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "256": { + "question_id": "256", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u25b3ABC\u4e2d\uff0cAD\u5e73\u5206\u2220BAC\uff0cAD\u4ea4BC\u4e8e\u70b9D\uff0cDE\u22a5AB\uff0c\u5782\u8db3\u4e3aE\uff0c\u82e5DE\uff1d3\uff0cAC\uff1d4\uff0c\u5219\u25b3ADC\u7684\u9762\u79ef\u4e3a\uff08\uff09\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6", + "choices": [ + "3", + "4", + "5", + "6" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 75, + "img_width": 148, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "258": { + "question_id": "258", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: An employee at the craft store counted the number of red buttons in each bag of mixed buttons. How many bags had at least 60 red buttons but fewer than 81 red buttons?'", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 224, + "img_width": 156, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "260": { + "question_id": "260", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the derivative of the function positive between [1, 2] assuming that it's differentiable?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 368, + "img_width": 412, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "262": { + "question_id": "262", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between genres of tv shows watched by highest female and lowest female?", + "choices": null, + "answer": "39", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 756, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "264": { + "question_id": "264", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For Group C, in which week is the cumulative increase in weight , the highest?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2237, + "img_width": 1754, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "266": { + "question_id": "266", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which has the most uneven shape?\nChoices:\n(A) oblique\n(B) obtuse\n(C) cordate\n(D) truncate", + "choices": [ + "oblique", + "obtuse", + "cordate", + "truncate" + ], + "answer": "oblique", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "oblique", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 225, + "img_width": 240, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "268": { + "question_id": "268", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Colton wants to buy 1+3/10 kilograms of English muffins. How much will he spend? (Unit: $)", + "choices": null, + "answer": "10.4", + "extraction": "10.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 194, + "img_width": 273, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "270": { + "question_id": "270", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A and B are three points on \u2299O and AB = AC. Connect BO and CO, if \u2220ABC = 65.0, then the degree of \u2220BOC is ()\nChoices:\n(A) 50\u00b0\n(B) 65\u00b0\n(C) 100\u00b0\n(D) 130\u00b0", + "choices": [ + "50\u00b0", + "65\u00b0", + "100\u00b0", + "130\u00b0" + ], + "answer": "100\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 114, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "272": { + "question_id": "272", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time does the clock show?\nChoices:\n(A) 9:30\n(B) 1:30\n(C) 4:30\n(D) 5:30\n(E) 11:30", + "choices": [ + "9:30", + "1:30", + "4:30", + "5:30", + "11:30" + ], + "answer": "4:30", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "9:30", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 261, + "img_width": 261, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "274": { + "question_id": "274", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u3001BC\u3001CD\u3001DA\u90fd\u662f\u2299O\u7684\u5207\u7ebf\uff0c\u5df2\u77e5AD\uff1d2\uff0cBC\uff1d5\uff0c\u5219AB+CD\u7684\u503c\u662f\uff08\uff09\nChoices:\n(A) 14\n(B) 12\n(C) 9\n(D) 7", + "choices": [ + "14", + "12", + "9", + "7" + ], + "answer": "7", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "14", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 119, + "img_width": 151, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "276": { + "question_id": "276", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, it is known that the radius of \u2299O is 5.0 and the chord AB = 8.0, then the distance from the center O to AB is ()\nChoices:\n(A) 1mm\n(B) 2mm\n(C) 3mm\n(D) 4mm", + "choices": [ + "1mm", + "2mm", + "3mm", + "4mm" + ], + "answer": "3mm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1mm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 102, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "278": { + "question_id": "278", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the following objects, which one has the best PSNR score?\nChoices:\n(A) Lego\n(B) Mats\n(C) Mic\n(D) Ship", + "choices": [ + "Lego", + "Mats", + "Mic", + "Ship" + ], + "answer": "Mic", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Lego", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 940, + "img_width": 1478, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "280": { + "question_id": "280", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, ABCDEF is a regular hexagon, and its center is point O. What is the value of x?\nChoices:\n(A) 80\n(B) 60\n(C) 40\n(D) 30\n(E) 20", + "choices": [ + "80", + "60", + "40", + "30", + "20" + ], + "answer": "60", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "80", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 123, + "img_width": 130, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "282": { + "question_id": "282", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What percent of the sun is showing?", + "choices": null, + "answer": "100", + "extraction": "100", + "prediction": "100", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "284": { + "question_id": "284", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the accuracy of the algorithm with lowest accuracy?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "286": { + "question_id": "286", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5c06\u4e00\u6839\u957f\u5ea6\u4e3a8cm\uff0c\u81ea\u7136\u4f38\u76f4\u7684\u5f39\u6027\u76ae\u7b4bAB\u4e24\u7aef\u56fa\u5b9a\u5728\u6c34\u5e73\u7684\u684c\u9762\u4e0a\uff0c\u7136\u540e\u628a\u76ae\u7b4b\u4e2d\u70b9C\u7ad6\u76f4\u5411\u4e0a\u62c9\u53473cm\u5230\u70b9D\uff0c\u5219\u6b64\u65f6\u8be5\u5f39\u6027\u76ae\u7b4b\u88ab\u62c9\u957f\u4e86\uff08\uff09\nChoices:\n(A) 6cm\n(B) 5cm\n(C) 4cm\n(D) 2cm", + "choices": [ + "6cm", + "5cm", + "4cm", + "2cm" + ], + "answer": "2cm", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6cm", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 82, + "img_width": 250, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "288": { + "question_id": "288", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which of the following value ranges of \u03bb2 does the percentage of Attack Effectiveness begin to be lower than that of Diversity?\nChoices:\n(A) 0.0 - 0.2\n(B) 0.2 - 0.4\n(C) 0.4 - 0.6\n(D) 0.6 - 0.8\n(E) 0.8 - 1.0", + "choices": [ + "0.0 - 0.2", + "0.2 - 0.4", + "0.4 - 0.6", + "0.6 - 0.8", + "0.8 - 1.0" + ], + "answer": "0.0 - 0.2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.0 - 0.2", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 606, + "img_width": 2144, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "290": { + "question_id": "290", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5e73\u884c\u7ebfAB\uff0cCD\u88ab\u76f4\u7ebfAE\u6240\u622a\uff0e\u82e5\u22201\uff1d105\u00b0\uff0c\u5219\u22202\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 75\u00b0\n(B) 85\u00b0\n(C) 95\u00b0\n(D) 105\u00b0", + "choices": [ + "75\u00b0", + "85\u00b0", + "95\u00b0", + "105\u00b0" + ], + "answer": "75\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "75\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 119, + "img_width": 132, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "292": { + "question_id": "292", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Rebecca Purple greater than Olive Drab?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 461, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "294": { + "question_id": "294", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: In Fig. 21-25, the particles have charges $q_1=-q_2=100 \\mathrm{nC}$ and $q_3=-q_4=200 \\mathrm{nC}$, and distance $a=$ $5.0 \\mathrm{~cm}$. What is the $x$ component of the net electrostatic force on particle 3?", + "choices": null, + "answer": "0.17", + "extraction": "-0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 293, + "img_width": 247, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "296": { + "question_id": "296", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The value of f(-3) is ____ the value of f(2)\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "equal to", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 776, + "img_width": 1430, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "298": { + "question_id": "298", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A decrease in rabbits would affect whose food source?\nChoices:\n(A) mountain lion\n(B) producer\n(C) decomposer\n(D) energy", + "choices": [ + "mountain lion", + "producer", + "decomposer", + "energy" + ], + "answer": "mountain lion", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "mountain lion", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 699, + "img_width": 768, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "300": { + "question_id": "300", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{HK}$ and $\\overline{IG}$ are diameters of $\\odot L$. Find $m \\widehat {IHJ}$.\nChoices:\n(A) 59\n(B) 135\n(C) 270\n(D) 301", + "choices": [ + "59", + "135", + "270", + "301" + ], + "answer": "270", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "59", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 492, + "img_width": 510, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "302": { + "question_id": "302", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the green curve?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a logarithmic function", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 300, + "img_width": 531, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "304": { + "question_id": "304", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In the figure above, two line segments meet at a point on line l. If the value of y is equal to the square of the value of x, what is the value of y?", + "choices": null, + "answer": "100", + "extraction": "100", + "prediction": "100", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 247, + "img_width": 431, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "306": { + "question_id": "306", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the bed much larger than the kitten?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "308": { + "question_id": "308", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this function most likely be?\nChoices:\n(A) a polynomial\n(B) a trigonometric function\n(C) an exponential function\n(D) a logarithmic function", + "choices": [ + "a polynomial", + "a trigonometric function", + "an exponential function", + "a logarithmic function" + ], + "answer": "a trigonometric function", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a polynomial", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 276, + "img_width": 482, + "language": "english", + "skills": [ + "algebraic reasoning", + "statistical reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "310": { + "question_id": "310", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find z\nChoices:\n(A) 10\n(B) \\frac { 32 } { 3 }\n(C) \\frac { 40 } { 3 }\n(D) \\frac { 50 } { 3 }", + "choices": [ + "10", + "\\frac { 32 } { 3 }", + "\\frac { 40 } { 3 }", + "\\frac { 50 } { 3 }" + ], + "answer": "\\frac { 40 } { 3 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 218, + "img_width": 350, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "312": { + "question_id": "312", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: An Idaho farmer has been monitoring crop prices over time. In 2003, which crop cost the most per cwt?'\nChoices:\n(A) potatoes\n(B) peas\n(C) apples\n(D) canola", + "choices": [ + "potatoes", + "peas", + "apples", + "canola" + ], + "answer": "apples", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "potatoes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 204, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "314": { + "question_id": "314", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Crimson the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 522, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "316": { + "question_id": "316", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, given that points A, B, and C are on \u2299O, \u2220AOB = 100.0, then the degree of \u2220ACB is ()\nChoices:\n(A) 50\u00b0\n(B) 80\u00b0\n(C) 100\u00b0\n(D) 200\u00b0", + "choices": [ + "50\u00b0", + "80\u00b0", + "100\u00b0", + "200\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "50\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 105, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "318": { + "question_id": "318", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the area of the figure. Round to the nearest tenth if necessary.\nChoices:\n(A) 191.5\n(B) 1128\n(C) 2256\n(D) 4512", + "choices": [ + "191.5", + "1128", + "2256", + "4512" + ], + "answer": "2256", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "191.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 175, + "img_width": 239, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "320": { + "question_id": "320", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u2220C\uff1d90\u00b0\uff0cAB\uff1d13\uff0cAC\uff1d5\uff0cD\u3001E\u5206\u522b\u662fAC\u3001AB\u7684\u4e2d\u70b9\uff0c\u5219DE\u7684\u957f\u662f\uff08\uff09\nChoices:\n(A) 6.5\n(B) 6\n(C) 5.5\n(D) \\frac{\u221a{119}}{2}", + "choices": [ + "6.5", + "6", + "5.5", + "\\frac{\u221a{119}}{2}" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6.5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 90, + "img_width": 170, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "322": { + "question_id": "322", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cA\uff0cB\u4e24\u70b9\u88ab\u6c60\u5858\u9694\u5f00\uff0c\u5728AB\u5916\u9009\u4e00\u70b9C\uff0c\u4f7f\u70b9C\u80fd\u76f4\u63a5\u5230\u8fbe\u70b9A\u548c\u70b9B\uff0c\u8fde\u63a5AC\u548cBC\uff0c\u5e76\u5206\u522b\u627e\u51faAC\u548cBC\u7684\u4e2d\u70b9M\uff0cN\uff0e\u5982\u679c\u6d4b\u5f97MN\uff1d20m\uff0c\u90a3\u4e48A\uff0cB\u4e24\u70b9\u7684\u8ddd\u79bb\u662f\uff08\uff09\nChoices:\n(A) 10m\n(B) 20m\n(C) 35m\n(D) 40m", + "choices": [ + "10m", + "20m", + "35m", + "40m" + ], + "answer": "40m", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10m", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 107, + "img_width": 148, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "324": { + "question_id": "324", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between highest and lowest value of dark blue bar?", + "choices": null, + "answer": "53", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 726, + "img_width": 800, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "326": { + "question_id": "326", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the pencil to the nearest inch. The pencil is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "7", + "prediction": "7", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 170, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "328": { + "question_id": "328", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of accuracies of the algorithm candy for all the datasets?", + "choices": null, + "answer": "18", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "330": { + "question_id": "330", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny cubes. Subtract all brown balls. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "332": { + "question_id": "332", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A taxi cab driver tracked how many miles he drove each month. How many miles did the taxi cab driver drive in total in January and April? (Unit: miles)", + "choices": null, + "answer": "7873", + "extraction": "2000", + "prediction": "2000", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 125, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "334": { + "question_id": "334", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer yellow metal tandem bikes in front of the small yellow metallic bicycle than metal bicycles on the left side of the large brown jet?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "336": { + "question_id": "336", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest individual bar in the whole chart?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "338": { + "question_id": "338", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle ABC above, AB = AC, E is the midpoint of line AB, and D is the midpoint of line AC. If AE = x and ED = 4, what is length BC?\nChoices:\n(A) 6\n(B) 8\n(C) 2*x\n(D) 4*x\n(E) 4*x^2", + "choices": [ + "6", + "8", + "2*x", + "4*x", + "4*x^2" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 167, + "img_width": 121, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "340": { + "question_id": "340", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following domains has the most number of BPE Tokens?\nChoices:\n(A) Legal \n(B) Code \n(C) Conversational \n(D) Math \n(E) Science\n(F) Books \n(G) News \n(H) Encyclopedic", + "choices": [ + "Legal ", + "Code ", + "Conversational ", + "Math ", + "Science", + "Books ", + "News ", + "Encyclopedic" + ], + "answer": "Science", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Legal ", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 1176, + "img_width": 2142, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "342": { + "question_id": "342", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, which of the following is the greatest?\nChoices:\n(A) a\n(B) b\n(C) c\n(D) d\n(E) e", + "choices": [ + "a", + "b", + "c", + "d", + "e" + ], + "answer": "d", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 299, + "img_width": 405, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "344": { + "question_id": "344", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of metal cars that are left of the tiny matte school bus greater than the number of tiny cyan double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "346": { + "question_id": "346", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the y-intercept of this function?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 339, + "img_width": 341, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "348": { + "question_id": "348", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are the pieces in triangle cuts?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 375, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "350": { + "question_id": "350", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "4", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 89, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "352": { + "question_id": "352", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people will fit in the smaller vehicle?", + "choices": null, + "answer": "1", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "354": { + "question_id": "354", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracies higher than 90?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "356": { + "question_id": "356", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer big motorbikes than rubber choppers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "358": { + "question_id": "358", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the cubes is the same as the unfolded cube?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "A", + "extraction": "D", + "prediction": "D", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 517, + "img_width": 326, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "360": { + "question_id": "360", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If $\\frac{I J}{X J}=\\frac{HJ}{YJ}, m \\angle W X J=130$\r\nand $m \\angle WZG=20,$ find $m \\angle YIZ$\nChoices:\n(A) 40\n(B) 50\n(C) 65\n(D) 110", + "choices": [ + "40", + "50", + "65", + "110" + ], + "answer": "50", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 370, + "img_width": 721, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "362": { + "question_id": "362", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all cyan cylinders. Subtract all tiny purple rubber objects. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "364": { + "question_id": "364", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, and points C and D are on \u2299O. If \u2220ABD = 50.0, then the degree of \u2220BCD is ()\nChoices:\n(A) 30\u00b0\n(B) 35\u00b0\n(C) 40\u00b0\n(D) 45\u00b0", + "choices": [ + "30\u00b0", + "35\u00b0", + "40\u00b0", + "45\u00b0" + ], + "answer": "40\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 114, + "img_width": 127, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "366": { + "question_id": "366", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "2", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 320, + "img_width": 250, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "368": { + "question_id": "368", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of yellow matte school buss greater than the number of big yellow metal cars?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "370": { + "question_id": "370", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram of the food web shown, if the number of ferns decrease, the supply of salmon will most likely?\nChoices:\n(A) decrease\n(B) can't tell\n(C) stay same\n(D) increase", + "choices": [ + "decrease", + "can't tell", + "stay same", + "increase" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 680, + "img_width": 880, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "372": { + "question_id": "372", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small gray spheres. Subtract all cylinders. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "374": { + "question_id": "374", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms calf and ivory?", + "choices": null, + "answer": "13", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "376": { + "question_id": "376", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all purple matte cubes. Subtract all tiny gray metal cubes. How many objects are left?", + "choices": null, + "answer": "7", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "378": { + "question_id": "378", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAD\u662f\u25b3ABC\u7684\u4e2d\u7ebf\uff0cE\u4e3aAD\u7684\u4e2d\u70b9\uff0c\u25b3ABE\u7684\u9762\u79ef\u4e3a2\uff0c\u5219\u25b3ABC\u7684\u9762\u79ef\u4e3a\uff08\uff09\nChoices:\n(A) 5\n(B) 6\n(C) 7\n(D) 8", + "choices": [ + "5", + "6", + "7", + "8" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 118, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "380": { + "question_id": "380", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For how many years that the percentage value over 4?", + "choices": null, + "answer": "6", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "382": { + "question_id": "382", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the building through the window at least five stories tall?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 400, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "384": { + "question_id": "384", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 495, + "img_width": 626, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "386": { + "question_id": "386", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x\nChoices:\n(A) 5\n(B) 10\n(C) 10 \\sqrt { 3 }\n(D) 20", + "choices": [ + "5", + "10", + "10 \\sqrt { 3 }", + "20" + ], + "answer": "10 \\sqrt { 3 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 247, + "img_width": 164, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "388": { + "question_id": "388", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Express the ratio of $\\tan M$ as a decimal to the nearest hundredth.\nChoices:\n(A) 0.38\n(B) 0.42\n(C) 0.92\n(D) 2.40", + "choices": [ + "0.38", + "0.42", + "0.92", + "2.40" + ], + "answer": "0.42", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.38", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 209, + "img_width": 342, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "390": { + "question_id": "390", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer jets that are left of the small brown suv than objects right of the big shiny car?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "392": { + "question_id": "392", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Mr. Huffman, a P.E. teacher, wrote down how much weight each of his students could lift. How many people lifted at least 46 pounds? (Unit: people)", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 136, + "img_width": 197, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "394": { + "question_id": "394", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following environments has the least GPU days for training?\nChoices:\n(A) HomeGrid\n(B) Msgr S1\n(C) Msgr S2\n(D) Msgr S3\n(E) VLN\n(F) LangRoom", + "choices": [ + "HomeGrid", + "Msgr S1", + "Msgr S2", + "Msgr S3", + "VLN", + "LangRoom" + ], + "answer": "LangRoom", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "HomeGrid", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 858, + "img_width": 1854, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "396": { + "question_id": "396", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, if all the algae dies then water flea population will\nChoices:\n(A) remains the same\n(B) decrease\n(C) increase\n(D) NA", + "choices": [ + "remains the same", + "decrease", + "increase", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "remains the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 576, + "img_width": 720, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "398": { + "question_id": "398", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 942, + "img_width": 727, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "400": { + "question_id": "400", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At which Episode ID does the Retroformer attain its peak Success rate (%)?\nChoices:\n(A) 1.0\n(B) 1.5\n(C) 2.0\n(D) 2.5\n(E) 3.0\n(F) 3.5\n(G) 4.0", + "choices": [ + "1.0", + "1.5", + "2.0", + "2.5", + "3.0", + "3.5", + "4.0" + ], + "answer": "4.0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1.0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 942, + "img_width": 1196, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "402": { + "question_id": "402", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the food chain diagram below, which animal would most directly lack food if Grasshoppers get exterminated?\nChoices:\n(A) Rabbit\n(B) Deer\n(C) Frogs\n(D) Wolf", + "choices": [ + "Rabbit", + "Deer", + "Frogs", + "Wolf" + ], + "answer": "Frogs", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Rabbit", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 735, + "img_width": 909, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "404": { + "question_id": "404", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the following schedule. Which activity begins at 11.50 A.M.?'\nChoices:\n(A) figure skating practice\n(B) private class\n(C) adult class\n(D) children's class", + "choices": [ + "figure skating practice", + "private class", + "adult class", + "children's class" + ], + "answer": "children's class", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "figure skating practice", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 217, + "img_width": 325, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "406": { + "question_id": "406", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many snowmen are there?", + "choices": null, + "answer": "15", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 183, + "img_width": 714, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "408": { + "question_id": "408", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find z.\nChoices:\n(A) 6\n(B) 6 \\sqrt { 2 }\n(C) 6 \\sqrt { 3 }\n(D) 6 \\sqrt { 5 }", + "choices": [ + "6", + "6 \\sqrt { 2 }", + "6 \\sqrt { 3 }", + "6 \\sqrt { 5 }" + ], + "answer": "6 \\sqrt { 5 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 238, + "img_width": 362, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "410": { + "question_id": "410", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the perimeter of $\\triangle D E F,$ if $\\triangle D E F \\sim \\triangle C B F,$ perimeter of $\\triangle C B F=27, D F=6,$ and $F C=8$\nChoices:\n(A) 20.25\n(B) 21\n(C) 27\n(D) 36", + "choices": [ + "20.25", + "21", + "27", + "36" + ], + "answer": "20.25", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "20.25", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 226, + "img_width": 405, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "412": { + "question_id": "412", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Tanner has $35. Does he have enough to buy a black jacket and a pair of shorts?'\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 192, + "img_width": 235, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "414": { + "question_id": "414", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If $ST=8, TR=4$, and $PT=6$, find $QR$.\nChoices:\n(A) 6\n(B) 8\n(C) 9\n(D) 10", + "choices": [ + "6", + "8", + "9", + "10" + ], + "answer": "9", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 386, + "img_width": 509, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "416": { + "question_id": "416", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: what is the highest volume written on the blender?", + "choices": null, + "answer": "800", + "extraction": "1000", + "prediction": "1000", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1024, + "img_width": 768, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "TextVQA", + "split": "testmini", + "task": "visual question answering" + }, + "418": { + "question_id": "418", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the number of grasshoppers decreases, what will the population of spiders most likely do?\nChoices:\n(A) remain the same\n(B) increase\n(C) decrease\n(D) NA", + "choices": [ + "remain the same", + "increase", + "decrease", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "remain the same", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "420": { + "question_id": "420", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the lowest value on the Y axis?", + "choices": null, + "answer": "0.0", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 1763, + "img_width": 2256, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "422": { + "question_id": "422", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest bar?", + "choices": null, + "answer": "10", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "424": { + "question_id": "424", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the food half eaten?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 428, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "426": { + "question_id": "426", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u82e5DE\u662f\u25b3ABC\u7684\u4e2d\u4f4d\u7ebf\uff0c\u25b3ADE\u7684\u5468\u957f\u4e3a1\uff0c\u5219\u25b3ABC\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4", + "choices": [ + "1", + "2", + "3", + "4" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 154, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "428": { + "question_id": "428", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "28", + "extraction": "30", + "prediction": "30", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 968, + "img_width": 1259, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "430": { + "question_id": "430", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The derivative of f(x) at x=0 is ____ that at x=5\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "smaller than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 393, + "img_width": 552, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "432": { + "question_id": "432", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of undernourished male children greater than 0.4 %?", + "choices": null, + "answer": "4", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1085, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "434": { + "question_id": "434", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, side AC of triangle ABC is on line l. What is x in terms of k?\nChoices:\n(A) 60-k\n(B) k\n(C) 60+k\n(D) 120-k\n(E) 120-2*k", + "choices": [ + "60-k", + "k", + "60+k", + "120-k", + "120-2*k" + ], + "answer": "60-k", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "60-k", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 157, + "img_width": 215, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "436": { + "question_id": "436", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many algorithms have accuracy lower than 8 in at least one dataset?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "438": { + "question_id": "438", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "13", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 367, + "img_width": 329, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "440": { + "question_id": "440", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the white plate half full?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 640, + "img_width": 480, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "442": { + "question_id": "442", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many objects are preferred by more than 7 people in at least one category?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "444": { + "question_id": "444", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the two genders?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "446": { + "question_id": "446", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0c\u70b9D\u662f\u25b3ABC\u7684\u5185\u5fc3\uff0c\u8fde\u63a5DB\uff0cDC\uff0c\u8fc7\u70b9D\u4f5cEF\u2225BC\u5206\u522b\u4ea4AB\u3001AC\u4e8e\u70b9E\u3001F\uff0c\u82e5BE+CF\uff1d8\uff0c\u5219EF\u7684\u957f\u5ea6\u4e3a\uff08\uff09\nChoices:\n(A) 4\n(B) 5\n(C) 8\n(D) 16", + "choices": [ + "4", + "5", + "8", + "16" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 105, + "img_width": 144, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "448": { + "question_id": "448", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year recorded the highest share of Urban secondary schools with access to electricity in India?", + "choices": null, + "answer": "2016", + "extraction": "2015", + "prediction": "2015", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "450": { + "question_id": "450", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If all the grass died, what would be most affected?\nChoices:\n(A) garter snakes\n(B) hognose snakes\n(C) hawks\n(D) grasshoppers", + "choices": [ + "garter snakes", + "hognose snakes", + "hawks", + "grasshoppers" + ], + "answer": "grasshoppers", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "garter snakes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 375, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "452": { + "question_id": "452", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the image, what is the most likely equilibrium population count?\nChoices:\n(A) 40\n(B) 60\n(C) 80\n(D) 100", + "choices": [ + "40", + "60", + "80", + "100" + ], + "answer": "80", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "40", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 366, + "img_width": 441, + "language": "english", + "skills": [ + "algebraic reasoning", + "statistical reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "454": { + "question_id": "454", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "456": { + "question_id": "456", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "458": { + "question_id": "458", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: If you add the two visible numbers, on the jerseys, what is the total sum?", + "choices": null, + "answer": "3", + "extraction": "23", + "prediction": "23", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 427, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "460": { + "question_id": "460", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If there were fewer leaves in this ecosystem, the first organism to experience change as a result would be:\nChoices:\n(A) Frogs\n(B) Crickets\n(C) Snakes\n(D) Hawks", + "choices": [ + "Frogs", + "Crickets", + "Snakes", + "Hawks" + ], + "answer": "Crickets", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Frogs", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 720, + "img_width": 960, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "462": { + "question_id": "462", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values larger than 100?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "464": { + "question_id": "464", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer for the missing picture.\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4\n(E) 5\n(F) 6", + "choices": [ + "1", + "2", + "3", + "4", + "5", + "6" + ], + "answer": "4", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 1316, + "img_width": 1000, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "466": { + "question_id": "466", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Periwinkle intersect Yellow Green?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 487, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "468": { + "question_id": "468", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people prefer the most preferred object?", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "470": { + "question_id": "470", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following models has the lowest KS Rollout Loss overall?\nChoices:\n(A) Baseline\n(B) Diffusion\n(C) PDE-Refiner\n(D) Pushforward", + "choices": [ + "Baseline", + "Diffusion", + "PDE-Refiner", + "Pushforward" + ], + "answer": "PDE-Refiner", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Baseline", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "line plot", + "grade": "college", + "img_height": 854, + "img_width": 1422, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "472": { + "question_id": "472", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "474": { + "question_id": "474", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many miles per gallon do an average city bus get?", + "choices": null, + "answer": "25", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 333, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "476": { + "question_id": "476", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If frogs were removed from this environment what animal would potentially see an increase in its population?\nChoices:\n(A) crickets\n(B) deer\n(C) snakes\n(D) hawks", + "choices": [ + "crickets", + "deer", + "snakes", + "hawks" + ], + "answer": "crickets", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "crickets", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 405, + "img_width": 518, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "478": { + "question_id": "478", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the diamond ABCD, two diagonal lines AC = 12.0, BD = 16.0, then the edge length of this diamond is ()\nChoices:\n(A) 10\n(B) 8\n(C) 6\n(D) 5", + "choices": [ + "10", + "8", + "6", + "5" + ], + "answer": "10", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 97, + "img_width": 125, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "480": { + "question_id": "480", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny blue metal bicycles behind the small sedan less than the number of purple fighters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "482": { + "question_id": "482", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, triangle ABC is inscribed in the circle with center O and diameter AC. If AB = AO, what is the degree measure of angle ABO?\nChoices:\n(A) 15*\\degree\n(B) 30*\\degree\n(C) 45*\\degree\n(D) 60*\\degree\n(E) 90*\\degree", + "choices": [ + "15*\\degree", + "30*\\degree", + "45*\\degree", + "60*\\degree", + "90*\\degree" + ], + "answer": "60*\\degree", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15*\\degree", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 134, + "img_width": 143, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "484": { + "question_id": "484", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "486": { + "question_id": "486", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0cAB\uff1d5\uff0cAD\uff1d7\uff0c\u5219ABCD\u7684\u5468\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 12\n(B) 14\n(C) 35\n(D) 24", + "choices": [ + "12", + "14", + "35", + "24" + ], + "answer": "24", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "12", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 79, + "img_width": 156, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "488": { + "question_id": "488", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown things. Subtract all tiny blue metallic objects. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "490": { + "question_id": "490", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u70b9A\u3001C\u3001B\u5728\u540c\u4e00\u76f4\u7ebf\u4e0a\uff0cDC\u22a5EC\uff0c\u82e5\u2220BCD\uff1d40\u00b0\uff0c\u5219\u2220ACE\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 30\u00b0\n(B) 40\u00b0\n(C) 50\u00b0\n(D) 60\u00b0", + "choices": [ + "30\u00b0", + "40\u00b0", + "50\u00b0", + "60\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 88, + "img_width": 155, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "492": { + "question_id": "492", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the \u2299O with a radius of 2.0, C is a point on the extended line of the diameter AB, CD is tangent to the circle at point D. Connect AD, given that \u2220DAC = 30.0, the length of the line segment CD is ()\nChoices:\n(A) 1\n(B) \u221a{3}\n(C) 2\n(D) 2\u221a{3}", + "choices": [ + "1", + "\u221a{3}", + "2", + "2\u221a{3}" + ], + "answer": "2\u221a{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 158, + "img_width": 203, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "494": { + "question_id": "494", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "3", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 97, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "496": { + "question_id": "496", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "20", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "498": { + "question_id": "498", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the water half full?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 478, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "500": { + "question_id": "500", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "3", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1236, + "img_width": 987, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "502": { + "question_id": "502", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tandem bikes that are behind the brown metal bicycle than matte trucks on the left side of the green object?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "504": { + "question_id": "504", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, D and E are the points on the edges AB and AC of \u25b3ABC, DE \u2225 BC, if AD:DB=1.0:3.0, AE = 2.0, then the length of AC is ()\nChoices:\n(A) 10\n(B) 8\n(C) 6\n(D) 4", + "choices": [ + "10", + "8", + "6", + "4" + ], + "answer": "8", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 86, + "img_width": 117, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "506": { + "question_id": "506", + "query": "Hint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?", + "choices": null, + "answer": "[2014, 2016]", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "true_false": false, + "question_type": "free_form", + "answer_type": "list", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "508": { + "question_id": "508", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The owner of a bed and breakfast inn recalled how many guests the inn had hosted each day. What is the median of the numbers?'", + "choices": null, + "answer": "5", + "extraction": "5", + "prediction": "5", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 241, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "510": { + "question_id": "510", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220C = 90.0, AC = 4.0, AB = 5.0, then the value of sinB is ()\nChoices:\n(A) \\frac{2}{3}\n(B) \\frac{3}{5}\n(C) \\frac{3}{4}\n(D) \\frac{4}{5}", + "choices": [ + "\\frac{2}{3}", + "\\frac{3}{5}", + "\\frac{3}{4}", + "\\frac{4}{5}" + ], + "answer": "\\frac{4}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{2}{3}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 186, + "img_width": 119, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "512": { + "question_id": "512", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the y coordinate of the center of mass of the isosceles right triangle of uniform areal density shown in Figure 9-C?", + "choices": null, + "answer": "0.24", + "extraction": "0.5", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 356, + "img_width": 497, + "language": "english", + "skills": [ + "geometry reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "514": { + "question_id": "514", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you wanted the leaf with the least main veins, which would you choose?\nChoices:\n(A) 3 main veins\n(B) pinnate\n(C) reticulate\n(D) palmate", + "choices": [ + "3 main veins", + "pinnate", + "reticulate", + "palmate" + ], + "answer": "3 main veins", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3 main veins", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 236, + "img_width": 559, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "516": { + "question_id": "516", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are most the stepping stones square?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 339, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "518": { + "question_id": "518", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2211, + "img_width": 2838, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "520": { + "question_id": "520", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Magenta have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 741, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "522": { + "question_id": "522", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 86, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "524": { + "question_id": "524", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The Kingwood Ski Resort asked its guests how many times they went sledding last winter. How many guests went sledding more than 2 times?'", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 163, + "img_width": 351, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "526": { + "question_id": "526", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) slide\n(B) flip\n(C) turn", + "choices": [ + "slide", + "flip", + "turn" + ], + "answer": "slide", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "slide", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 104, + "img_width": 253, + "language": "english", + "skills": [ + "geometry reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "528": { + "question_id": "528", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cAB\u2225CD\uff0cBD\u22a5CF\uff0c\u5782\u8db3\u4e3aB\uff0c\u2220ABF\uff1d35\u00b0\uff0c\u5219\u2220BDC\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 25\u00b0\n(B) 35\u00b0\n(C) 45\u00b0\n(D) 55\u00b0", + "choices": [ + "25\u00b0", + "35\u00b0", + "45\u00b0", + "55\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "25\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 135, + "img_width": 194, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "530": { + "question_id": "530", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The advertising agency counted the number of billboards in each city in the state. How many cities have fewer than 70 billboards? (Unit: cities)", + "choices": null, + "answer": "9", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 180, + "img_width": 140, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "532": { + "question_id": "532", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer gray trucks that are in front of the large aeroplane than big yellow metal objects in front of the purple object?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "534": { + "question_id": "534", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the percentage of stunted female children greater than the average percentage of stunted female children taken over all years ?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 883, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "536": { + "question_id": "536", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, points A, B, and C are on \u2299O, if \u2220C = 35.0, then \u2220AOB = ()\nChoices:\n(A) 17.5\u00b0\n(B) 35\u00b0\n(C) 60\u00b0\n(D) 70\u00b0", + "choices": [ + "17.5\u00b0", + "35\u00b0", + "60\u00b0", + "70\u00b0" + ], + "answer": "70\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "17.5\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 105, + "img_width": 115, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "538": { + "question_id": "538", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in the two concentric circles, the chord AB of the great circle is tangent to the small circle at point C. If AB = 6.0, the area of \u200b\u200bthe ring is ()\nChoices:\n(A) 9\u03c0\n(B) 6\u03c0\n(C) 3\u03c0\n(D) \u03c0", + "choices": [ + "9\u03c0", + "6\u03c0", + "3\u03c0", + "\u03c0" + ], + "answer": "9\u03c0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "9\u03c0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 115, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "540": { + "question_id": "540", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5", + "choices": [ + "3/11", + "8/11", + "6/11", + "3/5" + ], + "answer": "3/11", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3/11", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 103, + "img_width": 102, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "542": { + "question_id": "542", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many models in the figure achieve an Acc score greater than 60?", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scatter plot", + "grade": "college", + "img_height": 1358, + "img_width": 1690, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "544": { + "question_id": "544", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the total percentage of people who say that they do either less or more often than the usual amount of exercise during the coronavirus pandemic in the United States as of April 2020?", + "choices": null, + "answer": "44", + "extraction": "77", + "prediction": "77", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "546": { + "question_id": "546", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the overall ratio of male to female?", + "choices": null, + "answer": "1", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "548": { + "question_id": "548", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer cyan jets than big buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "550": { + "question_id": "550", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the accuracy of the algorithm with highest accuracy?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "552": { + "question_id": "552", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many queries have a p-value lower than 0.50?", + "choices": null, + "answer": "4", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "college", + "img_height": 330, + "img_width": 1726, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "PaperQA", + "split": "testmini", + "task": "figure question answering" + }, + "554": { + "question_id": "554", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Burlywood the minimum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 488, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "556": { + "question_id": "556", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer large red metallic things that are on the left side of the cyan shiny scooter than things that are in front of the small jet?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "558": { + "question_id": "558", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "560": { + "question_id": "560", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Salmon the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 514, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "562": { + "question_id": "562", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small green cubes. Subtract all large cylinders. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "564": { + "question_id": "564", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the difference between the highest and the lowest time required to import ?", + "choices": null, + "answer": "4", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 1056, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "566": { + "question_id": "566", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5df2\u77e5\u25b3ABC\u224c\u25b3DEF\uff0cCD\u5e73\u5206\u2220BCA\uff0c\u82e5\u2220A\uff1d22\u00b0\uff0c\u2220CGF\uff1d88\u00b0\uff0c\u5219\u2220E\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 26\u00b0\n(B) 28\u00b0\n(C) 30\u00b0\n(D) 34\u00b0", + "choices": [ + "26\u00b0", + "28\u00b0", + "30\u00b0", + "34\u00b0" + ], + "answer": "26\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "26\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 89, + "img_width": 89, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "568": { + "question_id": "568", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: For an economics project, Colleen determined the cost of ferry rides for bicycles and cars. How much higher is the fare for a car on the Mukilteu-Clinton ferry than on the Southport-Fort Fisher ferry? (Unit: $)", + "choices": null, + "answer": "2", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 349, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "570": { + "question_id": "570", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all purple matte blocks. Subtract all brown things. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "4", + "prediction": "4", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "572": { + "question_id": "572", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When does the function start decreasing?", + "choices": null, + "answer": "5", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 316, + "img_width": 400, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "574": { + "question_id": "574", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do you see the figures inside these boxes? They form a pattern. Choose the figure in the answer row below that continues the pattern.\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4\n(E) 5", + "choices": [ + "1", + "2", + "3", + "4", + "5" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 378, + "img_width": 868, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "576": { + "question_id": "576", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the human brain is the largest and most anterior part of each cerebral hemisphere?\nChoices:\n(A) motor cortex\n(B) occipital lobe\n(C) temporal lobe\n(D) frontal lobe", + "choices": [ + "motor cortex", + "occipital lobe", + "temporal lobe", + "frontal lobe" + ], + "answer": "frontal lobe", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "motor cortex", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 625, + "img_width": 768, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "578": { + "question_id": "578", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "9567", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 285, + "img_width": 637, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "580": { + "question_id": "580", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Slate the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 650, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "582": { + "question_id": "582", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Green greater than Rebecca Purple?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 582, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "584": { + "question_id": "584", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A philanthropic organization compared the amounts of money that its members donated to certain causes. Who donated more money to arts education, Aubrey or Connor?'\nChoices:\n(A) Connor\n(B) Aubrey", + "choices": [ + "Connor", + "Aubrey" + ], + "answer": "Connor", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Connor", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 187, + "img_width": 391, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "586": { + "question_id": "586", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220BAC = 90.0, rotate \u25b3ABC clockwise around point A by 90.0 to obtain \u25b3AB\u2032C\u2032 (the corresponding point of point B is point B\u2032, and the corresponding point of point C is point C \u2032), connect CC\u2032. If \u2220CC\u2032B\u2032 = 32.0, then the size of \u2220AC\u2032B\u2032 is ()\nChoices:\n(A) 32\u00b0\n(B) 45\u00b0\n(C) 13\u00b0\n(D) 30\u00b0", + "choices": [ + "32\u00b0", + "45\u00b0", + "13\u00b0", + "30\u00b0" + ], + "answer": "13\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "32\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 75, + "img_width": 80, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "588": { + "question_id": "588", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which year has more actual total income?", + "choices": null, + "answer": "1982", + "extraction": "1972", + "prediction": "1972", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2281, + "img_width": 1785, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "590": { + "question_id": "590", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "13", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 264, + "img_width": 376, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "592": { + "question_id": "592", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the global maximum of this function?", + "choices": null, + "answer": "4", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 318, + "img_width": 283, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "594": { + "question_id": "594", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When does the expenditure per student in Jamaica have the greatest increase?", + "choices": null, + "answer": "2005", + "extraction": "2005", + "prediction": "2005", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "596": { + "question_id": "596", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dodger Blue the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 407, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "598": { + "question_id": "598", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most curved beak species?\nChoices:\n(A) iiki\n(B) swallow-tanager\n(C) cliff swallow\n(D) hawfinch", + "choices": [ + "iiki", + "swallow-tanager", + "cliff swallow", + "hawfinch" + ], + "answer": "iiki", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "iiki", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 463, + "img_width": 593, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "600": { + "question_id": "600", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the answer.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "A", + "extraction": "D", + "prediction": "D", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 637, + "img_width": 424, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "602": { + "question_id": "602", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Rectangle ABCD is subdivided into two identical square regions, as in the figure above. If the area of each square is 9, what is the perimeter of ABCD?", + "choices": null, + "answer": "18", + "extraction": "24", + "prediction": "24", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 219, + "img_width": 435, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "604": { + "question_id": "604", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the roughest?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "606": { + "question_id": "606", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the rectangle?", + "choices": null, + "answer": "10", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 209, + "img_width": 335, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "608": { + "question_id": "608", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does South Carolina have the highest value in the South ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 560, + "img_width": 775, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "610": { + "question_id": "610", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, P, Q, and R lie on the same line. P is the center of the larger circle, and Q is the center of the smaller circle. If the radius of the larger circle is 4, what is the radius of the smaller circle?\nChoices:\n(A) 1\n(B) 2\n(C) 4\n(D) 8\n(E) 16", + "choices": [ + "1", + "2", + "4", + "8", + "16" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 353, + "img_width": 411, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "612": { + "question_id": "612", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue metal things. Subtract all tiny objects. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "614": { + "question_id": "614", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 661, + "img_width": 915, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "616": { + "question_id": "616", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the ratio of instagram to google?", + "choices": null, + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "618": { + "question_id": "618", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid the maximum?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "pie chart", + "grade": "daily life", + "img_height": 400, + "img_width": 532, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "620": { + "question_id": "620", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 199, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "622": { + "question_id": "622", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0cD\u662fBC\u4e0a\u7684\u70b9\uff0c\u4e14BD\uff1d2\uff0cDC\uff1d1\uff0cS\u25b3ACD\uff1d12\uff0c\u90a3\u4e48S\u25b3ABC\u7b49\u4e8e\uff08\uff09\nChoices:\n(A) 30\n(B) 36\n(C) 72\n(D) 24", + "choices": [ + "30", + "36", + "72", + "24" + ], + "answer": "36", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "30", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 92, + "img_width": 146, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "624": { + "question_id": "624", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the total unemployed labor force in Upper middle income greater than 1.6 %?", + "choices": null, + "answer": "5", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1344, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "626": { + "question_id": "626", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown objects. Subtract all large purple cylinders. How many objects are left?", + "choices": null, + "answer": "1", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "628": { + "question_id": "628", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728ABCD\u4e2d\uff0c\u2220ABC\u7684\u5e73\u5206\u7ebf\u4ea4AD\u4e8e\u70b9E\uff0c\u2220BCD\u7684\u5e73\u5206\u7ebf\u4ea4AD\u4e8e\u70b9F\uff0c\u82e5AB\uff1d3\uff0cAD\uff1d4\uff0c\u5219EF\u7684\u957f\u662f\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 2.5\n(D) 3", + "choices": [ + "1", + "2", + "2.5", + "3" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 111, + "img_width": 151, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "630": { + "question_id": "630", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Find the size of angle MBD in the figure below.", + "choices": null, + "answer": "72", + "extraction": "66", + "prediction": "66", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 195, + "img_width": 340, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "TheoremQA", + "split": "testmini", + "task": "textbook question answering" + }, + "632": { + "question_id": "632", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the total value of the More bar?", + "choices": null, + "answer": "52", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 350, + "img_width": 309, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "634": { + "question_id": "634", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u76f4\u7ebfAB\uff0cCD\u4ea4\u4e8e\u70b9O\uff0e\u5c04\u7ebfOE\u5e73\u5206\u2220BOC\uff0c\u82e5\u2220AOD\uff1d70\u00b0\uff0c\u5219\u2220AOE\u7b49\u4e8e\uff08\uff09\nChoices:\n(A) 35\u00b0\n(B) 110\u00b0\n(C) 135\u00b0\n(D) 145\u00b0", + "choices": [ + "35\u00b0", + "110\u00b0", + "135\u00b0", + "145\u00b0" + ], + "answer": "145\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 141, + "img_width": 173, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "636": { + "question_id": "636", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What number is shown?", + "choices": null, + "answer": "34", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 117, + "img_width": 92, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "638": { + "question_id": "638", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the under-5 male mortality rate greater than the average under-5 male mortality rate taken over all years ?", + "choices": null, + "answer": "1", + "extraction": "1", + "prediction": "1", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 650, + "img_width": 880, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "640": { + "question_id": "640", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $\\widehat{\\mathrm{WN}}$ if $\\triangle \\mathrm{IWN}$ is equilateral and $W N=5$\nChoices:\n(A) \\frac { 3 } { 5 } \\pi\n(B) \\frac { 5 } { 3 } \\pi\n(C) 5 \\pi\n(D) 10 \\pi", + "choices": [ + "\\frac { 3 } { 5 } \\pi", + "\\frac { 5 } { 3 } \\pi", + "5 \\pi", + "10 \\pi" + ], + "answer": "\\frac { 5 } { 3 } \\pi", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac { 3 } { 5 } \\pi", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 222, + "img_width": 309, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "642": { + "question_id": "642", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Line AB is tangent to circle O. If AB = 8 and OB = 10, find the diameter of the circle.\nChoices:\n(A) 4\n(B) 6\n(C) 8\n(D) 10\n(E) 12", + "choices": [ + "4", + "6", + "8", + "10", + "12" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 443, + "img_width": 347, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "644": { + "question_id": "644", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the missing number in the picture?\nChoices:\n(A) 6\n(B) 8\n(C) 10\n(D) 11", + "choices": [ + "6", + "8", + "10", + "11" + ], + "answer": "6", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 452, + "img_width": 494, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "646": { + "question_id": "646", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The employee at the department store counted the number of ties on each tie rack. How many racks have at least 0 ties? (Unit: racks)", + "choices": null, + "answer": "25", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 224, + "img_width": 131, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "648": { + "question_id": "648", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the minimum value of this function?", + "choices": null, + "answer": "-1", + "extraction": "-3", + "prediction": "-3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 296, + "img_width": 600, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "650": { + "question_id": "650", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the sum of maximum employment rate and minimum employment?", + "choices": null, + "answer": "31.3", + "extraction": "10.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "652": { + "question_id": "652", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 365, + "img_width": 845, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "654": { + "question_id": "654", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer yellow metallic motorbikes that are in front of the small brown metal dirtbike than big yellow dirtbikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "656": { + "question_id": "656", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Web Maroon the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 776, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "658": { + "question_id": "658", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 115, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "660": { + "question_id": "660", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer small fighters than yellow matte tandem bikes?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "662": { + "question_id": "662", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much more accurate is the most accurate algorithm compared the least accurate algorithm?", + "choices": null, + "answer": "80", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "664": { + "question_id": "664", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest number of responses for Question 10, for any given % of inside sales?", + "choices": null, + "answer": "17", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2245, + "img_width": 1692, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "666": { + "question_id": "666", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all red objects. Subtract all big green things. How many objects are left?", + "choices": null, + "answer": "4", + "extraction": "4", + "prediction": "4", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "668": { + "question_id": "668", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does the first symbol in the legend represent the smallest category ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 560, + "img_width": 775, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "670": { + "question_id": "670", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which date of Meeting was the most number of shares transferred?\nChoices:\n(A) 04/06/2005\n(B) 04/02/2005\n(C) 04/05/2005\n(D) 04/03/2005\n(E) 04/04/2005", + "choices": [ + "04/06/2005", + "04/02/2005", + "04/05/2005", + "04/03/2005", + "04/04/2005" + ], + "answer": "04/02/2005", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "04/06/2005", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2135, + "img_width": 1582, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "672": { + "question_id": "672", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the twig to the nearest inch. The twig is about (_) inches long.", + "choices": null, + "answer": "2", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 169, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "674": { + "question_id": "674", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, CDE is an equilateral triangle and ABCE is a square with an area of 1. What is the perimeter of polygon ABCDE?\nChoices:\n(A) 4\n(B) 5\n(C) 6\n(D) 7\n(E) 8", + "choices": [ + "4", + "5", + "6", + "7", + "8" + ], + "answer": "5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 89, + "img_width": 125, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "676": { + "question_id": "676", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "678": { + "question_id": "678", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x\nChoices:\n(A) 21\n(B) 34\n(C) 58\n(D) 67", + "choices": [ + "21", + "34", + "58", + "67" + ], + "answer": "58", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "21", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 149, + "img_width": 267, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "680": { + "question_id": "680", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "5", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 303, + "img_width": 440, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "682": { + "question_id": "682", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, if all the grass dies then population of squirrel will\nChoices:\n(A) decrease\n(B) remains the same\n(C) increase\n(D) NA", + "choices": [ + "decrease", + "remains the same", + "increase", + "NA" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 592, + "img_width": 864, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "684": { + "question_id": "684", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $\\overline{CH} \\cong \\overline{KJ}$. Find $x$.\nChoices:\n(A) 27\n(B) 54\n(C) 55\n(D) 83", + "choices": [ + "27", + "54", + "55", + "83" + ], + "answer": "55", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "27", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 444, + "img_width": 608, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "686": { + "question_id": "686", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function invertible?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 442, + "img_width": 731, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "688": { + "question_id": "688", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the minimum age group shown in the \u2018plots\u2019?\nChoices:\n(A) 11-15\n(B) 21-25\n(C) 6-10\n(D) 16-20\n(E) 0-5", + "choices": [ + "11-15", + "21-25", + "6-10", + "16-20", + "0-5" + ], + "answer": "0-5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "11-15", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2136, + "img_width": 3160, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "690": { + "question_id": "690", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram above, lines M and N are parallel. All of the following are true except\nChoices:\n(A) a + b = j + l\n(B) g = h\n(C) c + f = f + b\n(D) g + e + f + h = 360\n(E) d + e = f + j", + "choices": [ + "a + b = j + l", + "g = h", + "c + f = f + b", + "g + e + f + h = 360", + "d + e = f + j" + ], + "answer": "d + e = f + j", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "a + b = j + l", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 558, + "img_width": 625, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "692": { + "question_id": "692", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the given food chain if grasses dried up in summer, what is likely to happen?\nChoices:\n(A) Grasshoppers will decrease.\n(B) shrews will become extinct\n(C) owls will increase.\n(D) None of the above", + "choices": [ + "Grasshoppers will decrease.", + "shrews will become extinct", + "owls will increase.", + "None of the above" + ], + "answer": "Grasshoppers will decrease.", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Grasshoppers will decrease.", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 189, + "img_width": 600, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "694": { + "question_id": "694", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u83f1\u5f62ABCD\u4e2d\uff0cM\u3001N\u5206\u522b\u662fBC\u548cCD\u7684\u4e2d\u70b9\uff0cNP\u22a5AB\u4e8e\u70b9P\uff0c\u8fde\u63a5MP\uff0e\u82e5\u2220DAB\uff1d40\u00b0\uff0c\u5219\u2220MPB\uff1d\uff08\uff09\nChoices:\n(A) 125\u00b0\n(B) 120\u00b0\n(C) 115\u00b0\n(D) 110\u00b0", + "choices": [ + "125\u00b0", + "120\u00b0", + "115\u00b0", + "110\u00b0" + ], + "answer": "110\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "125\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 85, + "img_width": 158, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "696": { + "question_id": "696", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Erica has $1,525.00. Does she have enough to buy a motorcycle and a canoe?'\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 192, + "img_width": 214, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "698": { + "question_id": "698", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the triangle in the figure above, what is the value of x?\nChoices:\n(A) 2*\\sqrt{3}\n(B) 6*\\sqrt{2}\n(C) 6*\\sqrt{3}\n(D) 6\n(E) 12", + "choices": [ + "2*\\sqrt{3}", + "6*\\sqrt{2}", + "6*\\sqrt{3}", + "6", + "12" + ], + "answer": "2*\\sqrt{3}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2*\\sqrt{3}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 376, + "img_width": 615, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "700": { + "question_id": "700", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u2299O\u662f\u25b3ABC\u7684\u5916\u63a5\u5706\uff0cAB\uff1dBC\uff1d4\uff0c\u628a\u5f27AB\u6cbf\u5f26AB\u5411\u4e0b\u6298\u53e0\u4ea4BC\u4e8e\u70b9D\uff0c\u82e5\u70b9D\u4e3aBC\u4e2d\u70b9\uff0c\u5219AC\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) 2\n(C) 2\u221a{2}\n(D) \u221a{6}", + "choices": [ + "1", + "2", + "2\u221a{2}", + "\u221a{6}" + ], + "answer": "2\u221a{2}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 132, + "img_width": 144, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "702": { + "question_id": "702", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is cumulative increase in weight ( in grams) for \"GROUP A\" in third week ( give an approximate value) ?", + "choices": null, + "answer": "400", + "extraction": "100", + "prediction": "100", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "document image", + "grade": "daily life", + "img_height": 2237, + "img_width": 1754, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DocVQA", + "split": "testmini", + "task": "figure question answering" + }, + "704": { + "question_id": "704", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which two puzzle pieces form the larger square?\nChoices:\n(A) 1 & 2\n(B) 1 & 3\n(C) 1 & 4\n(D) 2 & 3\n(E) 2 & 4", + "choices": [ + "1 & 2", + "1 & 3", + "1 & 4", + "2 & 3", + "2 & 4" + ], + "answer": "1 & 3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1 & 2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 440, + "img_width": 396, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "706": { + "question_id": "706", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the image of the dot (8,-2) under a clockwise rotation by 270\u00b0 about the origin.\"\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D\n(E) E", + "choices": [ + "A", + "B", + "C", + "D", + "E" + ], + "answer": "C", + "extraction": "B", + "prediction": "B", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 432, + "img_width": 438, + "language": "english", + "skills": [ + "logical reasoning", + "geometry reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "708": { + "question_id": "708", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, the light source P is directly above the crossbar AB, the shadow of AB under the light is CD, AB \u2225 CD, AB = 2.0, CD = 5.0, the distance between point P and CD is 3.0, then the distance between AB and CD is ().\nChoices:\n(A) \\frac{6}{5}\n(B) \\frac{7}{6}\n(C) \\frac{9}{5}\n(D) \\frac{15}{2}", + "choices": [ + "\\frac{6}{5}", + "\\frac{7}{6}", + "\\frac{9}{5}", + "\\frac{15}{2}" + ], + "answer": "\\frac{9}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{6}{5}", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 110, + "img_width": 156, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "710": { + "question_id": "710", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1555, + "img_width": 2293, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "712": { + "question_id": "712", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "9", + "extraction": "12", + "prediction": "12", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 244, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "714": { + "question_id": "714", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of large brown rubber motorbikes in front of the big motorbike greater than the number of big green sedans?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "716": { + "question_id": "716", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find y.\nChoices:\n(A) 16 \\sqrt { 2 }\n(B) 16 \\sqrt { 3 }\n(C) 32\n(D) 16 \\sqrt { 5 }", + "choices": [ + "16 \\sqrt { 2 }", + "16 \\sqrt { 3 }", + "32", + "16 \\sqrt { 5 }" + ], + "answer": "16 \\sqrt { 5 }", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "16 \\sqrt { 2 }", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 196, + "img_width": 427, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "718": { + "question_id": "718", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: Jeffrey is the proud owner of an eclectic bow tie collection. He keeps track of how many bow ties he has, and organizes them by pattern and material. What is the probability that a randomly selected bow tie is designed with swirls and is made of velvet? Simplify any fractions.'", + "choices": null, + "answer": "0.21", + "extraction": "0.33", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 94, + "img_width": 215, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "720": { + "question_id": "720", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: When does the function value first reach 2?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 350, + "img_width": 362, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "722": { + "question_id": "722", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Deep Sky Blue the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 677, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "724": { + "question_id": "724", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Rebecca Purple have the minimum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 638, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "726": { + "question_id": "726", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x. Assume that any segment that appears to be tangent is tangent.\nChoices:\n(A) 10\n(B) 30\n(C) 90\n(D) 120", + "choices": [ + "10", + "30", + "90", + "120" + ], + "answer": "10", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 199, + "img_width": 228, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "728": { + "question_id": "728", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 69, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "730": { + "question_id": "730", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In which year the market share of KLA is highest?", + "choices": null, + "answer": "2019", + "extraction": "2013", + "prediction": "2013", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 557, + "img_width": 800, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "732": { + "question_id": "732", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism would be most affected if there was a shortage of plants?\nChoices:\n(A) Grasshopper\n(B) Snake\n(C) Mouse\n(D) Hawk", + "choices": [ + "Grasshopper", + "Snake", + "Mouse", + "Hawk" + ], + "answer": "Grasshopper", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Grasshopper", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1080, + "img_width": 1152, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "734": { + "question_id": "734", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer double buss that are behind the aeroplane than things on the left side of the yellow double bus?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "736": { + "question_id": "736", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5df2\u77e5\u76f4\u7ebfa\u2225b\uff0c\u76f4\u89d2\u4e09\u89d2\u5f62ABC\u4e2d\uff0c\u2220C\uff1d90\u00b0\uff0c\u82e5\u2220B\uff1d58\u00b0\uff0c\u90a3\u4e48\u22201\ufe63\u22202\uff1d\uff08\uff09\nChoices:\n(A) 28\u00b0\n(B) 30\u00b0\n(C) 32\u00b0\n(D) 58\u00b0", + "choices": [ + "28\u00b0", + "30\u00b0", + "32\u00b0", + "58\u00b0" + ], + "answer": "32\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "28\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 154, + "img_width": 226, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "738": { + "question_id": "738", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this function continuous?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 268, + "img_width": 383, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "740": { + "question_id": "740", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What percent of the stands are full?\nChoices:\n(A) 15\n(B) 100\n(C) 50\n(D) 50", + "choices": [ + "15", + "100", + "50", + "50" + ], + "answer": "15", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "15", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 375, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "742": { + "question_id": "742", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the twig to the nearest inch. The twig is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 159, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "744": { + "question_id": "744", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If RL = 5, RT = 9, and WS = 6, find RW.\nChoices:\n(A) 5.4\n(B) 6\n(C) 6.6\n(D) 7.5", + "choices": [ + "5.4", + "6", + "6.6", + "7.5" + ], + "answer": "7.5", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "5.4", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 199, + "img_width": 404, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "746": { + "question_id": "746", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Mrs. Zimmerman hosts an annual art contest for kids, and she keeps a record of the number of entries each year. According to the table, what was the rate of change between 2013 and 2014? (Unit: entries per year)", + "choices": null, + "answer": "7", + "extraction": "13", + "prediction": "13", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 199, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "748": { + "question_id": "748", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, PA and PB are tangents of \u2299O, the tangent point of point A and B, AC is the diameter of \u2299O, given that \u2220P = 50.0, then the size of \u2220ACB is ()\nChoices:\n(A) 65\u00b0\n(B) 60\u00b0\n(C) 55\u00b0\n(D) 50\u00b0", + "choices": [ + "65\u00b0", + "60\u00b0", + "55\u00b0", + "50\u00b0" + ], + "answer": "65\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "65\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 117, + "img_width": 207, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "750": { + "question_id": "750", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "18", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 356, + "img_width": 290, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "752": { + "question_id": "752", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cPA\u662f\u2299O\u7684\u5207\u7ebf\uff0c\u5207\u70b9\u4e3aA\uff0cOP\uff1d4\uff0c\u2220APO\uff1d30\u00b0\uff0c\u5219\u2299O\u7684\u534a\u5f84\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 1\n(B) \u221a{3}\n(C) 2\n(D) 3", + "choices": [ + "1", + "\u221a{3}", + "2", + "3" + ], + "answer": "2", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "1", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 87, + "img_width": 122, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "754": { + "question_id": "754", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Base your answers on the diagram below, which shows a partial food web. What will happen to fish population if algae's are decreased?\nChoices:\n(A) Population will decrease\n(B) Population will remain the same\n(C) Population will increase\n(D) None of the above", + "choices": [ + "Population will decrease", + "Population will remain the same", + "Population will increase", + "None of the above" + ], + "answer": "Population will decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Population will decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 364, + "img_width": 464, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "756": { + "question_id": "756", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the trees died, the population of porcupine would most likely\nChoices:\n(A) double\n(B) skyrocket\n(C) decrease\n(D) increase", + "choices": [ + "double", + "skyrocket", + "decrease", + "increase" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "double", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 591, + "img_width": 765, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "758": { + "question_id": "758", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of tiny purple trucks behind the small matte motorbike less than the number of fighters that are behind the big metal utility bike?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "760": { + "question_id": "760", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of yellow tandem bikes less than the number of big objects?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "762": { + "question_id": "762", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the center of symmetry of this function?\nChoices:\n(A) (0, 0)\n(B) (-1, 0)\n(C) (2, 0)", + "choices": [ + "(0, 0)", + "(-1, 0)", + "(2, 0)" + ], + "answer": "(0, 0)", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "(0, 0)", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 395, + "img_width": 500, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "764": { + "question_id": "764", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the average number of bananas on each stock?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 349, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "766": { + "question_id": "766", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer tiny red trucks than small blue bicycles?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "768": { + "question_id": "768", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use the graph to answer the question below. Which month is the hottest on average in Rome?\nChoices:\n(A) December, January, and February\n(B) July and August\n(C) March and April", + "choices": [ + "December, January, and February", + "July and August", + "March and April" + ], + "answer": "July and August", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "December, January, and February", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "elementary school", + "img_height": 323, + "img_width": 448, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "ScienceQA", + "split": "testmini", + "task": "textbook question answering" + }, + "770": { + "question_id": "770", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the amplitude of this function?", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 276, + "img_width": 482, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "772": { + "question_id": "772", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of small yellow shiny motorbikes greater than the number of red rubber fighters?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "774": { + "question_id": "774", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there fewer large matte utility bikes than small yellow bicycles?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "776": { + "question_id": "776", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $JQ$ if $Q$ is the incenter of $\\triangle JLN$. Rounded to the nearest hundredth.\nChoices:\n(A) 16.50\n(B) 18.79\n(C) 20.32\n(D) 25.50", + "choices": [ + "16.50", + "18.79", + "20.32", + "25.50" + ], + "answer": "18.79", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "16.50", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 424, + "img_width": 589, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "778": { + "question_id": "778", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you find the missing shape in this picture puzzle?\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D", + "choices": [ + "A", + "B", + "C", + "D" + ], + "answer": "A", + "extraction": "D", + "prediction": "D", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 431, + "img_width": 797, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "780": { + "question_id": "780", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "7", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 209, + "img_width": 848, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "782": { + "question_id": "782", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "4", + "extraction": "18", + "prediction": "18", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 376, + "img_width": 384, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "784": { + "question_id": "784", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Across all years, what is the maximum rating of statistical capacity in Maldives ?", + "choices": null, + "answer": "70", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 938, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "786": { + "question_id": "786", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find $m \\angle K$\nChoices:\n(A) 6\n(B) 60\n(C) 100\n(D) 180", + "choices": [ + "6", + "60", + "100", + "180" + ], + "answer": "100", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 237, + "img_width": 317, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "788": { + "question_id": "788", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 332, + "img_width": 515, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "790": { + "question_id": "790", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u25b3ABC\u4e2d\uff0cN\u662fBC\u8fb9\u4e0a\u7684\u4e2d\u70b9\uff0cAM\u5e73\u5206\u2220BAC\uff0cBM\u22a5AM\u4e8e\u70b9M\uff0c\u82e5AB\uff1d8\uff0cMN\uff1d2\uff0e\u5219AC\u7684\u957f\u4e3a\uff08\uff09\nChoices:\n(A) 10\n(B) 11\n(C) 12\n(D) 13", + "choices": [ + "10", + "11", + "12", + "13" + ], + "answer": "12", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 105, + "img_width": 145, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "792": { + "question_id": "792", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "7", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 2624, + "img_width": 3936, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "794": { + "question_id": "794", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many bars have values larger than 4?", + "choices": null, + "answer": "2", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "796": { + "question_id": "796", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "3", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1938, + "img_width": 2516, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "798": { + "question_id": "798", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, l || m. Which of the following must equal 180?\nChoices:\n(A) k + n + r\n(B) k + p + s\n(C) n + p + s\n(D) n + p + t\n(E) r + s + t", + "choices": [ + "k + n + r", + "k + p + s", + "n + p + s", + "n + p + t", + "r + s + t" + ], + "answer": "k + p + s", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "k + n + r", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 372, + "img_width": 371, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "800": { + "question_id": "800", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Medium Orchid intersect Forest Green?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "no", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 596, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "802": { + "question_id": "802", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Karen bought 4 pounds of silk scraps and 4 pounds of canvas scraps. How much did she spend? (Unit: $)", + "choices": null, + "answer": "69", + "extraction": "36", + "prediction": "36", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 194, + "img_width": 243, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "804": { + "question_id": "804", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\odot B$, $CE=13.5$. Find $BD$. Round to the nearest hundredth.\nChoices:\n(A) 3.71\n(B) 4.29\n(C) 4.53\n(D) 6.75", + "choices": [ + "3.71", + "4.29", + "4.53", + "6.75" + ], + "answer": "4.29", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3.71", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 524, + "img_width": 493, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "806": { + "question_id": "806", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, AB is the diameter of \u2299O, and point C is on \u2299O. If \u2220A = 40.0, then the degree of \u2220B is ()\nChoices:\n(A) 80\u00b0\n(B) 60\u00b0\n(C) 50\u00b0\n(D) 40\u00b0", + "choices": [ + "80\u00b0", + "60\u00b0", + "50\u00b0", + "40\u00b0" + ], + "answer": "50\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "80\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 107, + "img_width": 127, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "808": { + "question_id": "808", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all large purple spheres. Subtract all small gray things. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "810": { + "question_id": "810", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all yellow metallic balls. Subtract all small yellow shiny things. How many objects are left?", + "choices": null, + "answer": "8", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "812": { + "question_id": "812", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does the gray bar always have smaller value?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 1286, + "img_width": 840, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "814": { + "question_id": "814", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest individual bar in the whole chart?", + "choices": null, + "answer": "100000000", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "816": { + "question_id": "816", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x. Round to the nearest tenth, if necessary.\nChoices:\n(A) 3\n(B) 9\n(C) 12.25\n(D) 24", + "choices": [ + "3", + "9", + "12.25", + "24" + ], + "answer": "3", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "3", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 272, + "img_width": 379, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "818": { + "question_id": "818", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What's the ratio of least value of light brown graph and leftmost value of dark brown graph?", + "choices": null, + "answer": "0.32", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 434, + "img_width": 310, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "820": { + "question_id": "820", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: $a=14, b=48,$ and $c=50$ find $cosA$\nChoices:\n(A) 0.14\n(B) 0.48\n(C) 0.50\n(D) 0.96", + "choices": [ + "0.14", + "0.48", + "0.50", + "0.96" + ], + "answer": "0.96", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "0.14", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 160, + "img_width": 238, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "822": { + "question_id": "822", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the perimeter of the parallelogram. Round to the nearest tenth if necessary.\nChoices:\n(A) 22\n(B) 40\n(C) 44\n(D) 48", + "choices": [ + "22", + "40", + "44", + "48" + ], + "answer": "44", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "22", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 227, + "img_width": 356, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "824": { + "question_id": "824", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)", + "choices": null, + "answer": "0.13", + "extraction": "0.97", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 192, + "img_width": 247, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "826": { + "question_id": "826", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is the largest part of the lung?\nChoices:\n(A) Inferior lobes\n(B) Cardiac notch\n(C) Superior lobes\n(D) Middle lobe", + "choices": [ + "Inferior lobes", + "Cardiac notch", + "Superior lobes", + "Middle lobe" + ], + "answer": "Superior lobes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Inferior lobes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 479, + "img_width": 638, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "828": { + "question_id": "828", + "query": "Hint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: Linda wants to buy 0.9 pounds of double chocolate cookie dough. How much will she spend? (Unit: $)", + "choices": null, + "answer": "2.7", + "extraction": "3.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 1.0, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 194, + "img_width": 357, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "830": { + "question_id": "830", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "2", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 870, + "img_width": 1024, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "832": { + "question_id": "832", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(0)?", + "choices": null, + "answer": "-2", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 1920, + "img_width": 1920, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "834": { + "question_id": "834", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the states that border Georgia , does Florida have the lowest value ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 610, + "img_width": 785, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "836": { + "question_id": "836", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the smallest species shown?\nChoices:\n(A) chinlea\n(B) arganodus\n(C) semionotus\n(D) xenacanthus", + "choices": [ + "chinlea", + "arganodus", + "semionotus", + "xenacanthus" + ], + "answer": "semionotus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "chinlea", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1076, + "img_width": 1500, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "838": { + "question_id": "838", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1200, + "img_width": 1600, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "840": { + "question_id": "840", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which item can you get the most protein?\nChoices:\n(A) salami\n(B) wine\n(C) cheese\n(D) bread", + "choices": [ + "salami", + "wine", + "cheese", + "bread" + ], + "answer": "salami", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "salami", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 500, + "img_width": 375, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "842": { + "question_id": "842", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At a certain moment, there is a passenger ship at sea point P, and lighthouse A is measured in the direction 30.0 north by east of P, and is 50.0 nautical miles away. The passenger ship sails at the speed of 60.0 nautical mile/hour in the direction of 60.0 from north by west for $\\frac{2.0}{3.0}$hours to reach point B, then tan\u2220BAP = ()\nChoices:\n(A) \\frac{4}{5}\n(B) \\frac{6}{5}\n(C) \\frac{\u221a{5}}{5}\n(D) \\frac{2\u221a{5}}{5}", + "choices": [ + "\\frac{4}{5}", + "\\frac{6}{5}", + "\\frac{\u221a{5}}{5}", + "\\frac{2\u221a{5}}{5}" + ], + "answer": "\\frac{4}{5}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "\\frac{4}{5}", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 115, + "img_width": 154, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "844": { + "question_id": "844", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the larger window shaped like the smaller window?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "abstract scene", + "grade": "daily life", + "img_height": 400, + "img_width": 700, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "VQA-AS", + "split": "testmini", + "task": "visual question answering" + }, + "846": { + "question_id": "846", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Brown the high median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 758, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "848": { + "question_id": "848", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: In how many years, is the tuberculosis treatment success rate in Bulgaria greater than the average tuberculosis treatment success rate in Bulgaria taken over all years ?", + "choices": null, + "answer": "3", + "extraction": "1", + "prediction": "1", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 700, + "img_width": 1091, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "PlotQA", + "split": "testmini", + "task": "figure question answering" + }, + "850": { + "question_id": "850", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of cars in front of the tiny metal thing less than the number of large matte things in front of the cyan rubber road bike?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "852": { + "question_id": "852", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?", + "choices": null, + "answer": "40", + "extraction": "13", + "prediction": "13", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 598, + "img_width": 612, + "language": "english", + "skills": [ + "logical reasoning", + "arithmetic reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "854": { + "question_id": "854", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the pelicans in the community were eradicated, which population feel the most direct effect?\nChoices:\n(A) Plant\n(B) Phyto-plankton\n(C) Fish\n(D) Lizard", + "choices": [ + "Plant", + "Phyto-plankton", + "Fish", + "Lizard" + ], + "answer": "Fish", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Plant", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 947, + "img_width": 850, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "856": { + "question_id": "856", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture has the least leaves?\nChoices:\n(A) Both\n(B) Compound\n(C) Simple\n(D) Neither", + "choices": [ + "Both", + "Compound", + "Simple", + "Neither" + ], + "answer": "Simple", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Both", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 300, + "img_width": 400, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "858": { + "question_id": "858", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On the basis of the given food web, which organism will increase in number if there were no seals?\nChoices:\n(A) Shark\n(B) Small Shrimp\n(C) Octopus\n(D) Mysid Shrimp", + "choices": [ + "Shark", + "Small Shrimp", + "Octopus", + "Mysid Shrimp" + ], + "answer": "Octopus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Shark", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 764, + "img_width": 1162, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "860": { + "question_id": "860", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Miss Foley ran a sit-up competition among her P.E. students and monitored how many sit-ups each students could do. What is the largest number of sit-ups done? (Unit: sit-ups)", + "choices": null, + "answer": "86", + "extraction": "256", + "prediction": "256", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 246, + "img_width": 291, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "862": { + "question_id": "862", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: One of the most dramatic videos on the web (but entirely fictitious) supposedly shows a man sliding along a long water slide and then being launched into the air to land in a water pool. Let's attach some reasonable numbers to such a flight to calculate the velocity with which the man would have hit the water. Figure indicates the launch and landing sites and includes a superimposed coordinate system with its origin conveniently located at the launch site. From the video we take the horizontal flight distance as $D=20.0 \\mathrm{~m}$, the flight time as $t=2.50 \\mathrm{~s}$, and the launch angle as $\\theta_0=40.0^{\\circ}$. Find the magnitude of the velocity at launch and at landing.", + "choices": null, + "answer": "10.44", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "scientific figure", + "grade": "college", + "img_height": 600, + "img_width": 1302, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning", + "scientific reasoning" + ], + "source": "SciBench", + "split": "testmini", + "task": "textbook question answering" + }, + "864": { + "question_id": "864", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "16", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1738, + "img_width": 2480, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "866": { + "question_id": "866", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For trapezoid $Q R S T, A$ and $B$ are midpoints of the legs. Find $m \\angle S$\nChoices:\n(A) 45\n(B) 60\n(C) 120\n(D) 135", + "choices": [ + "45", + "60", + "120", + "135" + ], + "answer": "135", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 169, + "img_width": 359, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "868": { + "question_id": "868", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big green cylinders. Subtract all rubber cylinders. How many objects are left?", + "choices": null, + "answer": "5", + "extraction": "5", + "prediction": "5", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "870": { + "question_id": "870", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there more tiny motorbikes in front of the small cyan tandem bike than big cyan metal double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "872": { + "question_id": "872", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the next shape.\nChoices:\n(A) A\n(B) B\n(C) C\n(D) D", + "choices": [ + "A", + "B", + "C", + "D" + ], + "answer": "D", + "extraction": "D", + "prediction": "D", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 496, + "img_width": 1472, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "874": { + "question_id": "874", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of y at x=-2.5?", + "choices": null, + "answer": "2", + "extraction": "-2", + "prediction": "-2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 479, + "img_width": 479, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "876": { + "question_id": "876", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, square $ABDC$ is inscribed in $\\odot K$. Find the measure of a central angle.\nChoices:\n(A) 45\n(B) 60\n(C) 90\n(D) 180", + "choices": [ + "45", + "60", + "90", + "180" + ], + "answer": "90", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 275, + "img_width": 273, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "878": { + "question_id": "878", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728Rt\u25b3ABC\u4e2d\uff0c\u2220ACB\uff1d90\u00b0\uff0c\u4ee5Rt\u25b3ABC\u7684\u4e09\u8fb9\u4e3a\u8fb9\u5411\u5916\u4f5c\u6b63\u65b9\u5f62\uff0c\u5176\u9762\u79ef\u5206\u522b\u4e3aS1\uff0cS2\uff0cS3\uff0c\u4e14S1\uff1d5\uff0cS3\uff1d16\uff0c\u5219S2\uff1d\uff08\uff09\nChoices:\n(A) 6\n(B) 2\u221a{2}\n(C) 11\n(D) 24", + "choices": [ + "6", + "2\u221a{2}", + "11", + "24" + ], + "answer": "11", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "6", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 82, + "img_width": 94, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "880": { + "question_id": "880", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What's the total add up value of largest and smallest bar?", + "choices": null, + "answer": "252.65", + "extraction": "12.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "882": { + "question_id": "882", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Lawn Green the low median?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 400, + "img_width": 677, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "884": { + "question_id": "884", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the blue kite in the lower right corner shaped like?\nChoices:\n(A) ferret\n(B) cat\n(C) cloud\n(D) octopus", + "choices": [ + "ferret", + "cat", + "cloud", + "octopus" + ], + "answer": "octopus", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "ferret", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "numeric commonsense", + "geometry reasoning" + ], + "source": "A-OKVQA", + "split": "testmini", + "task": "visual question answering" + }, + "886": { + "question_id": "886", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: A newspaper researched how many grocery stores there are in each town. What is the median of the numbers?'", + "choices": null, + "answer": "6", + "extraction": "6", + "prediction": "6", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 218, + "img_width": 235, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "888": { + "question_id": "888", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small green shiny balls. Subtract all small metallic things. How many objects are left?", + "choices": null, + "answer": "3", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "890": { + "question_id": "890", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is larger the moon or the sun?\nChoices:\n(A) Sun\n(B) It varies\n(C) They are equal in size\n(D) Moon", + "choices": [ + "Sun", + "It varies", + "They are equal in size", + "Moon" + ], + "answer": "Sun", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Sun", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 844, + "img_width": 1500, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "892": { + "question_id": "892", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does New Jersey have a higher value than Georgia ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "894": { + "question_id": "894", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the sum of the accuracies of the algorithms fat and acre?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "896": { + "question_id": "896", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Approximately, what percentage of jewelry sales in January were Rings?\nChoices:\n(A) Around 21%\n(B) Around 27%\n(C) Around 31%\n(D) Around 37%", + "choices": [ + "Around 21%", + "Around 27%", + "Around 31%", + "Around 37%" + ], + "answer": "Around 31%", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Around 21%", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "bar chart", + "grade": "elementary school", + "img_height": 464, + "img_width": 758, + "language": "english", + "skills": [ + "logical reasoning", + "statistical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "898": { + "question_id": "898", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, A, B, and C are the three points on \u2299O, if \u2220C = 35.0, then the degree of \u2220OAB is ()\nChoices:\n(A) 35\u00b0\n(B) 55\u00b0\n(C) 65\u00b0\n(D) 70\u00b0", + "choices": [ + "35\u00b0", + "55\u00b0", + "65\u00b0", + "70\u00b0" + ], + "answer": "55\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "35\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 109, + "img_width": 112, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "900": { + "question_id": "900", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of rubber cars less than the number of brown jets?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "Yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "902": { + "question_id": "902", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the leaf base has an angle greater than 90 degrees, what is it called?\nChoices:\n(A) obtuse\n(B) decurrent\n(C) cuneate\n(D) acute", + "choices": [ + "obtuse", + "decurrent", + "cuneate", + "acute" + ], + "answer": "obtuse", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "obtuse", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 1429, + "img_width": 1500, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "TQA", + "split": "testmini", + "task": "textbook question answering" + }, + "904": { + "question_id": "904", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Fill in the blank to describe the model. The model has 9 dots divided into 3 equal groups. There are (_) dots in each group.", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 204, + "img_width": 633, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "906": { + "question_id": "906", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the sum of smallest two value is greater then then largest value?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 600, + "img_width": 850, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "ChartQA", + "split": "testmini", + "task": "figure question answering" + }, + "908": { + "question_id": "908", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: which organism would most likely have a decrease in its population if decrease the population of ant base of above diagram?\nChoices:\n(A) plant\n(B) human\n(C) lizard\n(D) snake", + "choices": [ + "plant", + "human", + "lizard", + "snake" + ], + "answer": "lizard", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "plant", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 497, + "img_width": 312, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "910": { + "question_id": "910", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue metal balls. Subtract all large matte things. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "912": { + "question_id": "912", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "4", + "extraction": "2", + "prediction": "2", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 413, + "img_width": 629, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "914": { + "question_id": "914", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all tiny purple shiny cubes. Subtract all large purple balls. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "916": { + "question_id": "916", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As shown in the figure, in Rt\u25b3ABC, \u2220C = 90.0, \u2220A = 30.0, BC = 2.0, the radius of \u2299C is 1.0, point P is the point on the hypotenuse AB, passing point P is a tangent PQ of \u2299C (Point Q is the tangent point), then the minimum value of the line segment PQ is ()\nChoices:\n(A) 2\n(B) \u221a{3}\n(C) \u221a{2}\n(D) 2-\\frac{\u221a{3}}{3}", + "choices": [ + "2", + "\u221a{3}", + "\u221a{2}", + "2-\\frac{\u221a{3}}{3}" + ], + "answer": "\u221a{2}", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "2", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 145, + "img_width": 112, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "UniGeo", + "split": "testmini", + "task": "geometry problem solving" + }, + "918": { + "question_id": "918", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Calculate the missing item.", + "choices": null, + "answer": "1", + "extraction": "13", + "prediction": "13", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 492, + "img_width": 538, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "920": { + "question_id": "920", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: The measure of angle BAC equals x*\\degree. What is the value of x?", + "choices": null, + "answer": "30", + "extraction": "3", + "prediction": "3", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 310, + "img_width": 388, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GEOS", + "split": "testmini", + "task": "geometry problem solving" + }, + "922": { + "question_id": "922", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the smallest individual element in the whole chart?", + "choices": null, + "answer": "1", + "extraction": "0", + "prediction": "0", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "924": { + "question_id": "924", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Periwinkle have the maximum area under the curve?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scatter plot", + "grade": "daily life", + "img_height": 400, + "img_width": 587, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "926": { + "question_id": "926", + "query": "Hint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: What is the size of the shaded area under the curve? Round the answer to 2 decimal places", + "choices": null, + "answer": "7.07", + "extraction": "0.0", + "prediction": null, + "true_false": false, + "question_type": "free_form", + "answer_type": "float", + "precision": 2.0, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 312, + "img_width": 433, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "928": { + "question_id": "928", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How much more does a navy blue bath mat cost than a yellow bath towel? (Unit: $)", + "choices": null, + "answer": "5", + "extraction": "7", + "prediction": "7", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 160, + "img_width": 234, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "930": { + "question_id": "930", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0cF\u662f\u25b3ABC\u7684\u89d2\u5e73\u5206\u7ebfCD\u548cBE\u7684\u4ea4\u70b9\uff0cCG\u22a5AB\u4e8e\u70b9G\uff0e\u82e5\u2220ACG\uff1d32\u00b0\uff0c\u5219\u2220BFC\u7684\u5ea6\u6570\u662f\uff08\uff09\nChoices:\n(A) 119\u00b0\n(B) 122\u00b0\n(C) 148\u00b0\n(D) 150\u00b0", + "choices": [ + "119\u00b0", + "122\u00b0", + "148\u00b0", + "150\u00b0" + ], + "answer": "119\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "119\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 79, + "img_width": 113, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "932": { + "question_id": "932", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would happen to the phytoplankton if krill increased?\nChoices:\n(A) decrease\n(B) increase\n(C) can't be predicted\n(D) stay the same", + "choices": [ + "decrease", + "increase", + "can't be predicted", + "stay the same" + ], + "answer": "decrease", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "decrease", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 350, + "img_width": 750, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "934": { + "question_id": "934", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of the largest bar?", + "choices": null, + "answer": "10000", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "bar chart", + "grade": "daily life", + "img_height": 448, + "img_width": 448, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "DVQA", + "split": "testmini", + "task": "figure question answering" + }, + "936": { + "question_id": "936", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: How many people in the image were born after the end of World War II?", + "choices": null, + "answer": "0", + "extraction": "0", + "prediction": "0", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 892, + "img_width": 710, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "938": { + "question_id": "938", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, $m \u22209 = 75$. Find the measure of $\\angle 6$.\nChoices:\n(A) 75\n(B) 85\n(C) 95\n(D) 105", + "choices": [ + "75", + "85", + "95", + "105" + ], + "answer": "105", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "75", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 278, + "img_width": 417, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "940": { + "question_id": "940", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big red things. Subtract all metallic things. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "7", + "prediction": "7", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "942": { + "question_id": "942", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the value of f(0)?", + "choices": null, + "answer": "0", + "extraction": "-10", + "prediction": "-10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 395, + "img_width": 500, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "944": { + "question_id": "944", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the perimeter of the shape?", + "choices": null, + "answer": "3", + "extraction": "3", + "prediction": "3", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "elementary school", + "img_height": 241, + "img_width": 258, + "language": "english", + "skills": [ + "geometry reasoning", + "arithmetic reasoning" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "946": { + "question_id": "946", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "16", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 373, + "img_width": 560, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "948": { + "question_id": "948", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Some students compared how many blocks they live from school. What is the mean of the numbers?'", + "choices": null, + "answer": "11", + "extraction": "14", + "prediction": "14", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "elementary school", + "img_height": 311, + "img_width": 207, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "950": { + "question_id": "950", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The slope of f(x) at x=0 is ____\nChoices:\n(A) positive\n(B) negative\n(C) zero\n(D) undefined", + "choices": [ + "positive", + "negative", + "zero", + "undefined" + ], + "answer": "positive", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "positive", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 744, + "img_width": 1114, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "952": { + "question_id": "952", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Base your answers on the food web below and on your knowledge of biology. A decrease in the Aquatic crustaceans population will most immediately decrease the available energy for the\nChoices:\n(A) Minnows\n(B) Ducks\n(C) Fish\n(D) Raccoons", + "choices": [ + "Minnows", + "Ducks", + "Fish", + "Raccoons" + ], + "answer": "Fish", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Minnows", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 258, + "img_width": 456, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "954": { + "question_id": "954", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A partial food web is shown below. Which of the following will most likely happen if the snake population decreases?\nChoices:\n(A) Cricket will increase\n(B) Mouse will increase\n(C) Rabbit will increase\n(D) All of above", + "choices": [ + "Cricket will increase", + "Mouse will increase", + "Rabbit will increase", + "All of above" + ], + "answer": "All of above", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Cricket will increase", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "scientific figure", + "grade": "high school", + "img_height": 277, + "img_width": 475, + "language": "english", + "skills": [ + "scientific reasoning" + ], + "source": "AI2D", + "split": "testmini", + "task": "textbook question answering" + }, + "956": { + "question_id": "956", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all small blue rubber objects. Subtract all brown shiny balls. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "6", + "prediction": "6", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "958": { + "question_id": "958", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Choose the missing letters from below to form a word, using all letters presented\nChoices:\n(A) A, R, N\n(B) R, D, N\n(C) I, A, M\n(D) H, O, W", + "choices": [ + "A, R, N", + "R, D, N", + "I, A, M", + "H, O, W" + ], + "answer": "R, D, N", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "A, R, N", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "puzzle test", + "grade": "elementary school", + "img_height": 773, + "img_width": 945, + "language": "english", + "skills": [ + "logical reasoning" + ], + "source": "IQTest", + "split": "testmini", + "task": "figure question answering" + }, + "960": { + "question_id": "960", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "4", + "extraction": "20", + "prediction": "20", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 1365, + "img_width": 2048, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "962": { + "question_id": "962", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The value of y at x=10 is ____ that at x=70.\nChoices:\n(A) larger than\n(B) equal to\n(C) smaller than", + "choices": [ + "larger than", + "equal to", + "smaller than" + ], + "answer": "smaller than", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "larger than", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "function plot", + "grade": "college", + "img_height": 301, + "img_width": 387, + "language": "english", + "skills": [ + "algebraic reasoning" + ], + "source": "FunctionQA", + "split": "testmini", + "task": "textbook question answering" + }, + "964": { + "question_id": "964", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long.", + "choices": null, + "answer": "6", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 70, + "img_width": 342, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "966": { + "question_id": "966", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Move the ruler to measure the length of the pencil to the nearest inch. The pencil is about (_) inches long.", + "choices": null, + "answer": "3", + "extraction": "7", + "prediction": "7", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "abstract scene", + "grade": "elementary school", + "img_height": 166, + "img_width": 438, + "language": "english", + "skills": [ + "numeric commonsense" + ], + "source": "IconQA", + "split": "testmini", + "task": "math word problem" + }, + "968": { + "question_id": "968", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all blue balls. Subtract all big yellow rubber balls. How many objects are left?", + "choices": null, + "answer": "6", + "extraction": "5", + "prediction": "5", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "970": { + "question_id": "970", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u4e24\u76f4\u7ebfa\uff0cb\u88ab\u76f4\u7ebfc\u6240\u622a\uff0c\u5df2\u77e5a\u2225b\uff0c\u22201\uff1d62\u00b0\uff0c\u5219\u22202\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 62\u00b0\n(B) 108\u00b0\n(C) 118\u00b0\n(D) 128\u00b0", + "choices": [ + "62\u00b0", + "108\u00b0", + "118\u00b0", + "128\u00b0" + ], + "answer": "118\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "62\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 141, + "img_width": 135, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "972": { + "question_id": "972", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of yellow shiny utility bikes greater than the number of brown metallic cruisers?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "974": { + "question_id": "974", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there the same number of big blue trucks and large purple metal double buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "976": { + "question_id": "976", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of metal biplanes behind the purple shiny object less than the number of purple school buss behind the big red object?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "978": { + "question_id": "978", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Allie kept a written log of how many miles she biked during the past 7 days. What is the range of the numbers?'", + "choices": null, + "answer": "7", + "extraction": "8", + "prediction": "8", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "table", + "grade": "high school", + "img_height": 280, + "img_width": 230, + "language": "english", + "skills": [ + "arithmetic reasoning", + "statistical reasoning" + ], + "source": "TabMWP", + "split": "testmini", + "task": "math word problem" + }, + "980": { + "question_id": "980", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the highest number shown?", + "choices": null, + "answer": "12", + "extraction": "12", + "prediction": "12", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 640, + "img_width": 429, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "VQA2.0", + "split": "testmini", + "task": "visual question answering" + }, + "982": { + "question_id": "982", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the states that border Wyoming , does South Dakota have the highest value ?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "map chart", + "grade": "high school", + "img_height": 500, + "img_width": 700, + "language": "english", + "skills": [ + "scientific reasoning", + "statistical reasoning" + ], + "source": "MapQA", + "split": "testmini", + "task": "figure question answering" + }, + "984": { + "question_id": "984", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of gray cars less than the number of small metallic minivans?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "986": { + "question_id": "986", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff0c\u5728\u25b3ABC\u4e2d\uff0cAD\u662f\u89d2\u5e73\u5206\u7ebf\uff0cAE\u662f\u9ad8\uff0e\u82e5\u2220B\uff1d40\u00b0\uff0c\u2220C\uff1d70\u00b0\uff0c\u5219\u2220EAD\u7684\u5ea6\u6570\u4e3a\uff08\uff09\nChoices:\n(A) 10\u00b0\n(B) 15\u00b0\n(C) 17.5\u00b0\n(D) 20\u00b0", + "choices": [ + "10\u00b0", + "15\u00b0", + "17.5\u00b0", + "20\u00b0" + ], + "answer": "15\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "10\u00b0", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 68, + "img_width": 101, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "988": { + "question_id": "988", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: What is the age gap between these two people in image? (Unit: years)", + "choices": null, + "answer": "1", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "general-vqa", + "context": "natural image", + "grade": "daily life", + "img_height": 333, + "img_width": 500, + "language": "english", + "skills": [ + "numeric commonsense", + "arithmetic reasoning" + ], + "source": "KVQA", + "split": "testmini", + "task": "visual question answering" + }, + "990": { + "question_id": "990", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In $\\odot S$, $m \\widehat {PQR}=98$, Find $m \\widehat {PQ}$.\nChoices:\n(A) 45\n(B) 49\n(C) 90\n(D) 98", + "choices": [ + "45", + "49", + "90", + "98" + ], + "answer": "49", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "45", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 452, + "img_width": 544, + "language": "english", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "Geometry3K", + "split": "testmini", + "task": "geometry problem solving" + }, + "992": { + "question_id": "992", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of purple metallic things that are behind the small green motorbike less than the number of blue metal articulated buss?\nChoices:\n(A) Yes\n(B) No", + "choices": [ + "Yes", + "No" + ], + "answer": "No", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "Yes", + "true_false": false, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "synthetic scene", + "grade": "daily life", + "img_height": 480, + "img_width": 640, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "Super-CLEVR", + "split": "testmini", + "task": "visual question answering" + }, + "994": { + "question_id": "994", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Magenta greater than Web Maroon?\nChoices:\n(A) yes\n(B) no", + "choices": [ + "yes", + "no" + ], + "answer": "yes", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "yes", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "general-vqa", + "context": "line plot", + "grade": "daily life", + "img_height": 400, + "img_width": 548, + "language": "english", + "skills": [ + "statistical reasoning" + ], + "source": "FigureQA", + "split": "testmini", + "task": "figure question answering" + }, + "996": { + "question_id": "996", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all big shiny balls. Subtract all blue rubber blocks. How many objects are left?", + "choices": null, + "answer": "2", + "extraction": "2", + "prediction": "2", + "true_false": true, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + }, + "998": { + "question_id": "998", + "query": "Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \u5982\u56fe\uff1a\u2220AOB\uff1a\u2220BOC\uff1a\u2220COD\uff1d2\uff1a3\uff1a4\uff0c\u5c04\u7ebfOM\u3001ON\uff0c\u5206\u522b\u5e73\u5206\u2220AOB\u4e0e\u2220COD\uff0c\u53c8\u2220MON\uff1d84\u00b0\uff0c\u5219\u2220AOB\u4e3a\uff08\uff09\nChoices:\n(A) 28\u00b0\n(B) 30\u00b0\n(C) 32\u00b0\n(D) 38\u00b0", + "choices": [ + "28\u00b0", + "30\u00b0", + "32\u00b0", + "38\u00b0" + ], + "answer": "28\u00b0", + "extraction": "\nPlease read the following example. Then extract the answer from the model response and type it at the end of the prompt.\n\nHint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Which number is missing?\n\nModel response: The number missing in the sequence is 14.\n\nExtracted answer: 14\n\nHint: Please answer the question requiring a floating-point number with one decimal place and provide the final value, e.g., 1.2, 1.3, 1.4, at the end.\nQuestion: What is the fraction of females facing the camera?\n\nModel response: The fraction of females facing the camera is 0.6, which means that six out of ten females in the group are facing the camera.\n\nExtracted answer: 0.6\n\nHint: Please answer the question requiring a floating-point number with two decimal places and provide the final value, e.g., 1.23, 1.34, 1.45, at the end.\nQuestion: How much money does Luca need to buy a sour apple candy and a butterscotch candy? (Unit: $)\n\nModel response: Luca needs $1.45 to buy a sour apple candy and a butterscotch candy.\n\nExtracted answer: 1.45\n\nHint: Please answer the question requiring a Python list as an answer and provide the final list, e.g., [1, 2, 3], [1.2, 1.3, 1.4], at the end.\nQuestion: Between which two years does the line graph saw its maximum peak?\n\nModel response: The line graph saw its maximum peak between 2007 and 2008.\n\nExtracted answer: [2007, 2008]\n\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is blue?\nChoices:\n(A) 3/11\n(B) 8/11\n(C) 6/11\n(D) 3/5\n\nModel response: The correct answer is (B) 8/11.\n\nExtracted answer: B\n", + "prediction": "28\u00b0", + "true_false": true, + "question_type": "multi_choice", + "answer_type": "text", + "precision": null, + "category": "math-targeted-vqa", + "context": "geometry diagram", + "grade": "high school", + "img_height": 118, + "img_width": 181, + "language": "chinese", + "skills": [ + "geometry reasoning", + "algebraic reasoning" + ], + "source": "GeoQA+", + "split": "testmini", + "task": "geometry problem solving" + }, + "1000": { + "question_id": "1000", + "query": "Hint: Please answer the question requiring an integer answer and provide the final value, e.g., 1, 2, 3, at the end.\nQuestion: Subtract all brown matte cylinders. Subtract all big purple matte things. How many objects are left?", + "choices": null, + "answer": "9", + "extraction": "10", + "prediction": "10", + "true_false": false, + "question_type": "free_form", + "answer_type": "integer", + "precision": null, + "category": "math-targeted-vqa", + "context": "synthetic scene", + "grade": "elementary school", + "img_height": 240, + "img_width": 320, + "language": "english", + "skills": [ + "arithmetic reasoning" + ], + "source": "CLEVR-Math", + "split": "testmini", + "task": "math word problem" + } +} \ No newline at end of file